需要安装 python-docx 和 PyPDF2
pip install python-docx
pip install PyPDF2
记住,不能直接安装 pip install docx ,不然会报错 ImportError: No module named ‘exceptions’
import PyPDF2
from docx import Document
def convert_pdf_to_doc(pdf_path, doc_path):
with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
doc = Document()
nums = len(pdf_reader.pages)
print("total page:", nums)
for page_num in range(nums):
page = pdf_reader.pages[page_num]
text = page.extract_text()
doc.add_paragraph(text)
print("current page:", page_num)
doc.save(doc_path)
# 使用示例
pdf_path = 'in.pdf' # 输入的PDF文件路径
doc_path = 'out.docx' # 输出的DOC文件路径
convert_pdf_to_doc(pdf_path, doc_path)