python按页码范围拆分word并生成pdf
先对页码进行提取,再对需要的页数进行抽取并合成一个word,再将word保存成pdf
from win32com.client import Dispatch, DispatchEx
def copy_doc(doc, pdf_path,start_page=1,end_page=0):
# 打开文件,获得word的页数
word = Dispatch("Word.Application")
word.Visible = 0
word.DisplayAlerts = 0
doc_ = word.Documents.Open(doc)
pages = doc_.ActiveWindow.Panes(1).Pages.Count
# 新建word,设置保存路径
doc_add = word.Documents.Add()
newFile = f'test_new{start_page}-{end_page}.docx'
doc_add.SaveAs(os.path.abspath(newFile))
doc_new = word.Documents.Open(os.path.abspath(newFile))
# 页码判断
if start_page == 1 and end_page == 0:
end_page = pages
elif start_page <1 or end_page>pages or start_page > end_page:
raise "请检查页码"
# 进行写入新的word中
for i in range(start_page, end_page+1):
objRectangles = doc_.ActiveWindow.Panes(1).Pages(i).Rectangles
for a in range(objRectangles.Count):
objRectangles.Item(a + 1).Range.Copy()
doc_new.Range(doc_new.Content.End - 1, doc_new.Content.End - 1).Paste()
# 设置路径保存pdf,并且删除新建的word
pdf_name = doc.split("\\")[-1].split(".")
name = pdf_name[0]+f"{start_page}-{end_page}.pdf"
path = os.path.join(pdf_path,name)
doc_new.SaveAs(path, 17)
doc_new.Close()
doc_.Close()
word.Quit()
os.remove(newFile)