markdown导出PDF无标题序号

# 博客地址:qq_38896687-CSDN博客 

#__author__ = '沅宸'

#__date__ = '2024/01/01'

#引用两个方法

from PyPDF2 import PdfReader, PdfWriter

def get_pdf_Bookmark(filename):

    "作者CSDN:qq_38896687-CSDN博客"

    if isinstance(filename, str):

        pdf_reader = PdfReader(filename)

    else:

        pdf_reader = filename

    pagecount = len(pdf_reader.pages)

    # 用保存每个标题id所对应的页码

    idnum2pagenum = {}

    for i in range(pagecount):

        page = pdf_reader.pages[i]

        idnum2pagenum[page.indirect_ref.idnum] = i

    # 保存每个标题对应的标签数据,包括层级,标题和页码索引(页码-1)

    bookmark = []

    def get_pdf_Bookmark_inter(outlines, tab=0):

        for outline in outlines:

            if isinstance(outline, list):

                get_pdf_Bookmark_inter(outline, tab + 1)

            else:

                bookmark.append(

                    (tab, outline['/Title'], idnum2pagenum[outline.page.idnum]))

    get_pdf_Bookmark_inter(pdf_reader.outline)

    return bookmark

def pdf_write_bookmark(bookmark, pdf_file, compress=True):

    pdf_reader = PdfReader(pdf_file)

    num_pages = len(pdf_reader.pages)

    pdf_writer = PdfWriter()

    for page in pdf_reader.pages:

        if compress:

            page.compress_content_streams()

        pdf_writer.add_page(page)

    # pdf_reader.

    last_cache = [None] * (max(bookmark, key=lambda x: x[0])[0] + 1)

    for tab, title, pagenum in bookmark:

        if pagenum >= num_pages:

            continue

        parent = last_cache[tab - 1] if tab > 0 else None

        indirect_id = pdf_writer.add_outline_item(title, pagenum, parent=parent)

        last_cache[tab] = indirect_id

    pdf_writer.page_mode = "/UseOutlines"

    with open(pdf_file, "wb") as out:

        pdf_writer.write(out)

    print("已成功将书签写入到", pdf_file)

if __name__ == '__main__':

    file = r"G:\文档\typora笔记\软硬件产品技术.pdf"   #

    bookmark = get_pdf_Bookmark(file)

    num_cache = [0] * 7

    last_tab = 0

    new_bookmark = []

    for tab, title, pagenum in bookmark:

        if tab > last_tab:

            num_cache[tab] = 1

        else:

            num_cache[tab] += 1

        new_title = title

        if not title[0].isdigit():

            new_title = ".".join(map(str, num_cache[:tab + 1])) + " " + title

        # print(tab, new_title, pagenum)

        new_bookmark.append((tab, new_title, pagenum))

        last_tab = tab

    pdf_write_bookmark(new_bookmark, file)

  • 22
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值