import os
import fitz # PyMuPDF
def find_paper(directory, target_title):
for filename in os.listdir(directory):
if filename.endswith('.pdf'):
file_path = os.path.join(directory, filename)
print(f"Reading file: {filename}") # 添加调试信息
try:
doc = fitz.open(file_path)
first_page = doc.load_page(0)
text = first_page.get_text()
#print(f"Text from the first page of {filename}:\n{text}\n") # 打印第一页的文本
if text and target_title.lower() in text.lower():
print(f"Found '{target_title}' in file: {filename}")
return filename
except Exception as e:
print(f"Could not read file {filename}: {e}")
print(f"'{target_title}' not found in any PDF file.")
return None
directory = 'path_to_your_pdf_files' # 替换为你存储PDF文件的目录路径
target_title = 'ABC_Net' #替换为你要找的文章名
found_file = find_paper(directory, target_title)
if found_file:
print(f"Paper found: {found_file}")
else:
print("Paper not found.")
从包含大量以数字编号的pdf论文中找到想要的论文
于 2024-07-01 10:14:29 首次发布