下面是通过软件实现方法:
使用soffice软件的转换功能实现需求
环境安装:
sudo apt install libreoffice-writer
如果出现没安装java的warning那么执行下面的命令:
sudo apt install default-jre
sudo apt install libreoffice-java-common
转换命令:
soffice --convert-to pdf 输入文件.txt --outdir 输出目录 #会生成输出目录下同名的.pdf文件
soffice --convert-to pdf 输入文件.txt #会生成当前目录下同名的.pdf文件
python实现方法:
之前使用python的库来实现此需求,但是比较繁琐,效果不好,要调整每一页的大小和坐标,所以转向使用现成的软件通过命令行转换,效果能满足需要。
python库的安装:
pip install pdfminer PyPDF2 reportlab -i https://pypi.tuna.tsinghua.edu.cn/simple
pdf转换代码:
# -*- coding: UTF-8 -*-
import sys
import string
import fitz
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.colors import HexColor
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
def create_pdf(input_text, output_file):
pdfmetrics.registerFont(TTFont('msyh', 'msyh.ttf')) # 注册字体
c = canvas.Canvas(output_file, pagesize=letter)
width, height = letter
c.setFont('msyh', 10)
c.setFillColor(HexColor(0x666666))
text = c.beginText(50, height-50)
text.setFont('msyh', 12)
text.setFillColor(HexColor(0x333333))
lines = input_text.split('\n')
for line in lines:
text.textLine(line)
c.drawText(text)
c.save()
if __name__ == "__main__":
text = "这是一段测试文本。" * 1000
create_pdf(text, "output.pdf")
PDF增加文字水印代码:
# -*- coding: UTF-8 -*-
import sys
import string
import fitz
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.colors import HexColor
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
def add_watermark(input_file, output_file, watermark):
from PyPDF2 import PdfFileWriter, PdfFileReader,PdfReader,PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from io import BytesIO
packet = BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
width, height = letter
can.rotate(15)
can.setFont('msyh', 30)
can.setFillColor(HexColor(0xFF00FF))
can.drawString(width/2, height/2, watermark)
can.drawString(width/3, height/4, watermark)
can.drawString(width/3, (height/4)*3, watermark)
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
existing_pdf = PdfReader(open(input_file, "rb"))
output = PdfWriter()
for i in range(len(existing_pdf.pages)):
page = existing_pdf.pages[i]
watermark = new_pdf.pages[0]
page.merge_page(watermark)
output.add_page(page)
with open(output_file, "wb") as outputStream:
output.write(outputStream)
if __name__ == "__main__":
pdfmetrics.registerFont(TTFont('msyh', 'msyh.ttf')) # 注册字体
add_watermark("input.pdf", "input_watermark.pdf", "版权所有")
PDF增加图像水印代码:
# -*- coding: UTF-8 -*-
import sys
import fitz
import string
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.colors import HexColor
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
def add_watermark(input_file, output_file, watermark):
from PyPDF2 import PdfFileWriter, PdfFileReader,PdfReader,PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from io import BytesIO
packet = BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
width, height = letter
can.setFillColor(HexColor(0xFF00FF))
can.setFillAlpha(0.5)
can.drawImage(watermark, 0, 0, width, height)
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
existing_pdf = PdfReader(open(input_file, "rb"))
output = PdfWriter()
for i in range(len(existing_pdf.pages)):
page = existing_pdf.pages[i]
watermark = new_pdf.pages[0]
page.merge_page(watermark)
output.add_page(page)
with open(output_file, "wb") as outputStream:
output.write(outputStream)
if __name__ == "__main__":
pdfmetrics.registerFont(TTFont('msyh', 'msyh.ttf')) # 注册字体
#pdfwatermark.png 为自定义背景图片
add_watermark("input.pdf", "input_watermark.pdf", "pdfwatermark.png")