- 源代码如下:
# -*- coding:utf-8 -*-
# 添加水印
from PyPDF2 import PdfFileReader, PdfFileWriter
from copy import copy
sy = PdfFileReader(r"C:\cdb\intron_prd\ig.plm\ig\templates\watermark.pdf")
mark_page = sy.getPage(0) # 水印所在的页数
# 读取添加水印的文件
file_reader = PdfFileReader(r"C:\cdb\pdfconver" + u"\ACL替代物料通知书20230612-6.pdf")
file_writer = PdfFileWriter()
for page in range(file_reader.getNumPages()):
# 读取需要添加水印每一页pdf
source_page = file_reader.getPage(page)
new_page = copy(mark_page) #
new_page.mergePage(source_page) # new_page(水印)在下面,source_page原文在上面
file_writer.addPage(new_page)
with open(r"C:\cdb\pdfconver"+u"\ACL替代物料通知书20230612-6_有水印.pdf", 'wb') as out:
file_writer.write(out)
2.报错提示
Connected to pydev debugger (build 193.7288.30)
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm 2019.3.5\plugins\python\helpers\pydev\pydevd.py", line 1434, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
File "C:/cdb/intron_prd/ig.plm/ig/documents/ig_pdf_watermark.py", line 16, in <module>
new_page.mergePage(source_page) # new_page(水印)在下面,source_page原文在上面
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\pdf.py", line 2239, in mergePage
self._mergePage(page2)
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\pdf.py", line 2260, in _mergePage
new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\pdf.py", line 2177, in _mergeResources
newRes[newname] = page2Res[key]
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\generic.py", line 517, in __getitem__
return dict.__getitem__(self, key).getObject()
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\generic.py", line 178, in getObject
return self.pdf.getObject(self).getObject()
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\pdf.py", line 1611, in getObject
retval = readObject(self.stream, self)
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\generic.py", line 66, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\generic.py", line 580, in readFromStream
value = readObject(stream, pdf)
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\generic.py", line 60, in readObject
return NameObject.readFromStream(stream, pdf)
File "C:\cdb\CONTACT Elements Server 15.4.42\lib\site-packages\PyPDF2\generic.py", line 493, in readFromStream
raise utils.PdfReadError("Illegal character in Name Object")
PyPDF2.utils.PdfReadError: Illegal character in Name Object
Process finished with exit code 1
3. 处理方式
报错原因:该错误是由于读取的PDF文件中包含多种编码导致
- 分析上面报错部分,可以看出错误来源于E:\python_workspace\TornadoDemo\venv\Lib\site-packages\PyPDF2\generic.py", line 484。generic.py文件第484行,原始内容为:
-
try: return NameObject(name.decode('utf-8')) except (UnicodeEncodeError, UnicodeDecodeError) as e: # Name objects should represent irregular characters # with a '#' followed by the symbol's hex number if not pdf.strict: warnings.warn("Illegal character in Name Object", utils.PdfReadWarning) return NameObject(name) else: raise utils.PdfReadError("Illegal character in Name Object")
- 需要将上述原始内容,修改为如下内容:
-
try: return NameObject(name.decode('utf-8')) except (UnicodeEncodeError, UnicodeDecodeError) as e: # Name objects should represent irregular characters # with a '#' followed by the symbol's hex number try: return NameObject(name.decode('gbk')) except (UnicodeEncodeError, UnicodeDecodeError) as e: if not pdf.strict: warnings.warn("Illegal character in Name Object", utils.PdfReadWarning) return NameObject(name) else: raise utils.PdfReadError("Illegal character in Name Object")
- 接着,修改utils.py文件中的第238行。utils.py文件中的第238行原始内容如下所示:
r = s.encode('latin-1') if len(s) < 2: bc[s] = r return r
- 需要将上述原始内容,修改为如下内容:
try: r = s.encode('latin-1') except Exception as e: r = s.encode('utf-8') if len(s) < 2: bc[s] = r return r
然后重启就好啦!哈哈哈哈