使用python自动合并目录下的pdf
文件,文件按照序号排序进行合并
代码运行
pip install PyPDF2
python pdf.py --dir_path /path/to/dir
参数说明
- dir_path: 需要合并的pdf文件目录
- file_name: 合并后的pdf文件名称,默认为
merged.pdf
运行代码后,会在dir_path
目录下生成合并后的pdf文件
'''
使用示例:
pip install PyPDF2
python pdf.py --dir_path /path/to/dir
'''
import argparse
from PyPDF2 import PdfMerger
from tqdm import tqdm
from pathlib import Path
def merge_pdfs_in_dir(dir_path: Path, file_name: str):
# pdf_files = sorted([f for f in os.listdir(dir_path) if f.endswith(".pdf")])
pdf_files = sorted([f.as_posix() for f in dir_path.glob("*.pdf")])
print("PDF files:", pdf_files)
merger = PdfMerger()
for pdf in tqdm(pdf_files):
pdf_path = dir_path / pdf
file_size = pdf_path.stat().st_size
if file_size:
merger.append(pdf_path.as_posix())
else:
print(f"Skipping empty file: {pdf_path.as_posix()}")
merged_file = dir_path / file_name
if not merged_file.exists():
merged_file.touch()
merger.write(merged_file.as_posix())
merger.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--dir_path", type=str, help="Directory path containing PDF files")
parser.add_argument("--file_name", type=str, default="merged.pdf", help="Output file name")
args = parser.parse_args()
dir_path = Path(args.dir_path)
merge_pdfs_in_dir(dir_path, args.file_name)