python pdf 转 图片、pdf 转 word、 word 转 pdf
前言
pdf word 互转
一、环境
-
python 3.85
-
PyMuPDF 1.19.6
-
pdf2docx 0.5.3
-
doc2pdf 0.1.8
二、api文档
- PyMuPDF (pdf转图片) https://pymupdf.readthedocs.io/en/latest/index.html
- pdf2docx (pdf转word) https://dothinking.github.io/pdf2docx/installation.html
- doc2pdf (word 转 pdf) https://pypi.org/project/docx2pdf/
三、使用
1.pdf转图片
pip install pymupdf
import fitz
#打开pdf
pdf = fitz.open(<pdf_path_dir>)
#遍历pdf
for page in pdf.pages(0, pdf.page_count, 1):
#创建像素图
pix = page.get_pixmap(matrix=fitz.Matrix(a, b))
#保存
pix.save(f"{out}page-%i.png" % page.number)
2.pdf转word
pip install pdf2docx
from pdf2docx import Converter
cv = Converter(<pdf_path_dir>)
cv.convert(out) # all pages by default
cv.close()
3.doc2pdf
pip install docx2pdf
from docx2pdf import convert
convert(<doc_path_dir>, <pdf_path_dir>)
四、完整代码
# -*- coding: utf-8 -*-
# !/usr/bin/env python
# @Time : 2022/6/16 9:56
# @Author : mtl
# @Desc : ***
# @File : test.py
# @Software: PyCharm
from pathlib import Path
def pdf_to_img(file: str, out: str = "./", a: int = 2, b: int = 2):
"""
:param file: 文件名
:param a: 缩放系数 越大分辨率 越高
:param b: 缩放系数 越大分辨率 越高
:return: None
"""
import fitz
pdf = Path(file)
assert pdf.is_file() and pdf.suffix == '.pdf', "不是pdf!"
pdf = fitz.open(pdf)
for page in pdf.pages(0, pdf.page_count, 1):
pix = page.get_pixmap(matrix=fitz.Matrix(a, b))
pix.save(f"{out}page-%i.png" % page.number)
def pdf_to_word(file: str, out: str = "out.docx"):
from pdf2docx import Converter
pdf = Path(file)
print(pdf.suffix != ".pdf")
assert pdf.is_file() and pdf.suffix == '.pdf', "不是pdf!"
cv = Converter(pdf)
cv.convert(out) # all pages by default
cv.close()
def word_to_pdf(file: str, out: str = "out.pdf"):
from docx2pdf import convert
pdf = Path(file)
assert pdf.is_file() and pdf.suffix in ['.dox', '.docx'], "不是word!"
convert(pdf, out)
if __name__ == '__main__':
name = r"C:\Users\e9\Documents\1_环境信息管控平台项目可研---.pdf"
pdf_to_img(name)
pdf_to_word(name)
word_to_pdf("out.docx")