安装bibtexparser
pip install bibtexparser
代码
import bibtexparser
from difflib import SequenceMatcher
def parse_bib_file(filename):
with open(filename, 'r', encoding='utf-8') as bibfile:
bib_database = bibtexparser.load(bibfile)
return bib_database.entries
def find_duplicates(entries):
duplicates = dict()
all_dict = dict()
NO_ID_Duplicate = True
for i, entry in enumerate(entries):
if entry['ID'] in all_dict.keys():
print('发现重复: ', f"{entry['ID'], entry['title']}") # ID 重复
NO_ID_Duplicate = False
continue
all_dict[entry['ID']] = entry['title']
for k, v in all_dict.items():
out = title_in_values(v, list(all_dict.values())) # title 重复
duplicates.update(out)
return duplicates,NO_ID_Duplicate
def title_in_values(title: str, values: list):
values.remove(title)
re_d = dict()
for i in values:
ratio = SequenceMatcher(None, i, title).ratio()
re_d[i] = [title, ratio]
out = sorted(re_d.items(), key=lambda x: x[1][1], reverse=True)
out = filter(lambda x: x[1][1] > 0.9, out)
return dict(out)
def main():
bib_filename = './file/MAGNet.bib' # 你的.bib文件名
entries = parse_bib_file(bib_filename)
duplicates,NO_ID_Duplicate = find_duplicates(entries)
if len(duplicates) == 0 and NO_ID_Duplicate:
print('未发现重复!')
else:
for i in duplicates:
print('发现疑似重复:', i)
if __name__ == "__main__":
main()
效果

