总说
有时候,你写毕业论文之类的,你需要合并多篇论文中的bibtex到一个,从而可以使得你论文的所有bibtex的引用正常。
存在以下四种情况:
-
标题相同,label不同 -> 二者都保持(这是因为,你直接复制正文时,不需要更改标签了。两种标签都可以用,都引用相同的论文)
-
标题相同,label相同 -> 保留一个
-
标题不同,label相同 -> 二者保留,记录log中,人为后续处理
-
标题不同,label相同 -> 二者保留,记录到log中,人为后续处理
-
其他情况(这是为了防止不小心略过的论文)-> 检查原本所有bib文件中的论文,进行更新
import os
import re
from collections import defaultdict
def read_bib_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
entries = re.split(r'\n@', content)
entries = ['@' + entry.strip() for entry in entries if entry.strip()]
return entries
def extract_title(entry):
title_match = re.search(r'title\s*=\s*{(.+?)}', entry, re.IGNORECASE)
if title_match:
return title_match.group(1).strip()
return None
def extract_citation_key(entry):
key_match = re.search(r'@\w+{(.+?),', entry)
if key_match:
return key_match.group(1).strip()
return None
def combine_bib_files(file_paths):
combined_entries = defaultdict(list)
citation_key_entries = defaultdict(list)
duplicates = []
errors = []
all_entries = []
for file_path in file_paths:
entries = read_bib_file(file_path)
all_entries.extend(entries) # Keep track of all entries for summary
for entry in entries:
title = extract_title(entry)
citation_key = extract_citation_key(entry)
if title and citation_key:
combined_entries[title].append(entry)
citation_key_entries[citation_key].append(entry)
final_entries = []
for title, entries in combined_entries.items():
if len(entries) > 1:
keys = set(extract_citation_key(entry) for entry in entries)
if len(keys) > 1:
duplicates.append((title, entries))
final_entries.extend(entries)
else:
final_entries.append(entries[0])
else:
final_entries.append(entries[0])
for key, entries in citation_key_entries.items():
if len(entries) > 1:
titles = set(extract_title(entry) for entry in entries)
if len(titles) > 1:
errors.append((key, entries))
return final_entries, duplicates, errors, all_entries
def write_combined_bib_file(entries, output_file):
with open(output_file, 'w', encoding='utf-8') as file:
for entry in entries:
file.write(entry + '\n\n')
def log_duplicates(duplicates, log_file):
with open(log_file, 'w', encoding='utf-8') as file:
for title, entries in duplicates:
file.write(f"Title: {title}\n")
for entry in entries:
file.write(entry + '\n\n')
file.write('-' * 80 + '\n')
def log_errors(errors, log_file):
with open(log_file, 'w', encoding='utf-8') as file:
for key, entries in errors:
file.write(f"Citation Key: {key}\n")
for entry in entries:
file.write(entry + '\n\n')
file.write('-' * 80 + '\n')
if __name__ == "__main__":
# List of paths to your bib files
bib_files = ['ref1.bib', 'ref2.bib', 'ref3.bib']
# Combine entries from all files
combined_entries, duplicates, errors, all_entries = combine_bib_files(bib_files)
# Write the combined entries to a new file
output_file = 'combined_ref.bib'
write_combined_bib_file(combined_entries, output_file)
# Log duplicates for manual merging
duplicates_log_file = 'duplicates_log.txt'
log_duplicates(duplicates, duplicates_log_file)
# Log errors for manual checking
error_log_file = 'error_log.txt'
log_errors(errors, error_log_file)
# Print summary information
print(f"Combined bib file created: {output_file} with {len(combined_entries)} entries")
print(f"Duplicates log created: {duplicates_log_file} with {len(duplicates)} entries")
print(f"Error log created: {error_log_file} with {len(errors)} entries")
for file in bib_files:
entries = read_bib_file(file)
print(f"{file}: {len(entries)} entries processed")
print(f"Total unique entries combined: {len(combined_entries)}")
print(f"Total entries processed: {len(all_entries)}")