Python 代码 PDF 生成书签代码 ,使用PyPDF4 库。
txt_path 目录文件
pdf_path pdf 文件 换成自己的地址
txt 文件 格式
第 1 章-LAMP 网站构建-1
1.1-Web 概述-1
1.1.1-Web 应用的优势-2
1.1.2-Web 2.0-时代的互联网-3
章节-标题-目录中的页码
import PyPDF4
txt_path = '/Users/lcd/Desktop/lcd/python/pdfbookmark/pdf/test.txt'
pdf_path = '/Users/lcd/Desktop/lcd/python/pdfbookmark/pdf/php2.pdf'
ini_page = 26 # pdf 正式开始的页码
# 目录 可以复制pdf文档中的目录结构到txt. 调整成文档中的结构。
def get_mu_lu(page):
sy = ''
with open(txt_path, 'r') as f:
txt_lines = f.readlines()
for line in txt_lines:
line_split = line.split('-')
line_title = line_split[1]
line_num = line_split[-1].replace('\n', '')
line_level = line_split[0]
line_level_split = line_level.split('.')
if str(page) == str(line_num):
if len(line_level_split) > 1:
sy = line_level+line_title
if len(line_level_split) > 2:
sy = line_level+line_title
break
else:
break
return sy
def read_pdf():
# 打开pdf 文件
pdf_file = open(pdf_path, 'rb')
pdf_reader = PyPDF4.PdfFileReader(pdf_file)
pdf_page_num = pdf_reader.getNumPages()
# 创建 pdfwriter 对象
pdf_writer = PyPDF4.PdfFileWriter()
for page_num in range(pdf_page_num):
page = pdf_reader.getPage(page_num)
pdf_writer.addPage(page)
if page_num > ini_page:
mulu = get_mu_lu(page_num - ini_page)
if mulu:
pdf_writer.addBookmark(mulu, page_num)
print(mulu)
output_file = open('example_with_bookmarks.pdf', 'wb')
pdf_writer.write(output_file)
pdf_file.close()
output_file.close()
read_pdf()
希望可以帮到您