PdfFileWriter().write() UnicodeEncodeError: 'latin-1' codec can't encode characters in position 8-9

这个小python是用来将pdf转化成图片的,

看样子有两中方法一是:Wand,二是:pdf2image
我用的pdf2image,因为这个pdf大小不定,少的几十页,多的可能有几千页,
在用pdf2image时大致有两种

from pdf2image import convert_from_path
convert_from_path('a.pdf', 500, "output",fmt="JPEG",output_file="ok",thread_count=4)

这种转换是直接写入到磁盘上的,因此不会占用太多内存。

另一种写法是:
from pdf2image import convert_from_path
pages = convert_from_path('pdf_file', 500)
for page in pages:
    page.save('out.jpg', 'JPEG')
    
但这种写法会占用大量内存,因为convert_from_path的默认格式是ppm,其次若不指定输出则默认是写入到内存中的。

很显然第一种适合我,但是第一种运行竟然什么都没有输出,也没有报错,也没有结果,what the fc**
在这里插入图片描述

有个小伙伴是这么写的:

from PyPDF2 import PdfFileReader, PdfFileWriter
from pdf2image import convert_from_path, convert_from_bytes
import os
import tempfile
import cv2
import re
def split_pdf(infile, out_path, temp):
    """
    :param infile: 待拆分的pdf文件
    :param out_path: 拆分成单页的pdf文件的存储路径
    :return: 无
    """
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    with open(infile, 'rb') as infile:

        reader = PdfFileReader(infile)
        number_of_pages = reader.getNumPages()  #计算此PDF文件中的页数

        for i in range(number_of_pages):

            global number
            number = i
            writer = PdfFileWriter()
            writer.addPage(reader.getPage(i))   # 2019.8.17 这个地方没错,删了个-1 。若是不删的话将会以最后一页为0,第一页为1,依次往下;删了的话将以第一页为0,第二页为1,依次往下。
            out_file_name = temp

            with open(out_file_name, 'wb') as outfile:
                writer.write(outfile)
            yield i

            # try:
            #     with open(out_file_name, 'wb') as outfile:
            #          writer.write(outfile)
            #     yield i
            # except:
            #     print("chucuole     "+ str(number))

def turn_picture(in_File, out_Path):
    temp = '/home/zh/local/flowChart/pic/temp/' + 'temp'+'.pdf'
    for i in split_pdf(in_File, out_Path, temp):
        with tempfile.TemporaryDirectory() as path:
            images = convert_from_path(temp, dpi=10)
            for index, img in enumerate(images):
                global number
                # *** 取名字要注意
                img.save(os.path.join(out_Path, re.compile("[0-9|a-z]*").findall(os.path.basename(in_File))[0]+"_"+str(number)+".png"))
                number += 1
                print('已经转化'+str(number))

if __name__ == '__main__':
    for file in os.listdir('/home/zh/local/flowChart/pdfs/'):
        global in_File
        in_File = os.path.join('/home/zh/local/flowChart/pdfs/', file)
        # in_File = '/home/zheng/zheng/1doc/003.pdf'
        out_Path = '/home/zh/local/flowChart/pic/'  # 生成输出文件夹
        print(in_File)
        turn_picture(in_File, out_Path)

竟然能运行,,,但是报错

/home/zh/local/flowChart/pdfs/2015年全国大学生数学建模竞赛A题优秀论文太阳影子定位模型教程.pdf
Traceback (most recent call last):
  File "/home/zh/local/flowChart/code/PdfToPic.py", line 130, in <module>
    turn_picture(in_File, out_Path)
  File "/home/zh/local/flowChart/code/PdfToPic.py", line 113, in turn_picture
    for i in split_pdf(in_File, out_Path, temp):
  File "/home/zh/local/flowChart/code/PdfToPic.py", line 101, in split_pdf
    writer.write(outfile)
  File "/home/zh/anaconda3/lib/python3.7/site-packages/PyPDF2/pdf.py", line 501, in write
    obj.writeToStream(stream, key)
  File "/home/zh/anaconda3/lib/python3.7/site-packages/PyPDF2/generic.py", line 549, in writeToStream
    value.writeToStream(stream, encryption_key)
  File "/home/zh/anaconda3/lib/python3.7/site-packages/PyPDF2/generic.py", line 472, in writeToStream
    stream.write(b_(self))
  File "/home/zh/anaconda3/lib/python3.7/site-packages/PyPDF2/utils.py", line 238, in b_
    r = s.encode('latin-1')
UnicodeEncodeError: 'latin-1' codec can't encode characters in position 8-9: ordinal not in range(256)

参考http://www.aiuxian.com/article/p-1985272.html把源码给了,
然后竟然行了。。。。。
在这里插入图片描述

评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值