用python生成pdf,记录一下。
import pdfkit
import json
import requests
import time
import os
from PyPDF2 import PdfFileReader, PdfFileWriter
from reportlab.lib.units import cm
from reportlab.pdfgen import canvas
# 将wkhtmltopdf.exe程序绝对路径传入config对象
path_wkthmltopdf = r'D:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=path_wkthmltopdf)
pdfkit_options = {'encoding': 'UTF-8'}
filename="daytest.pdf"
str_content=""
contents=""
description=""
def start(offset_dynamic_id):
global str_content,contents,description
url="https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history?visitor_uid=283857977&host_uid=192450084&offset_dynamic_id="+str(offset_dynamic_id)+"&need_top=1&platform=web"
headers={
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
}
r=requests.get(url,headers=headers)
r.encoding="utf8"
j=json.loads(r.text)
lens_cards = len(j['data']['cards'])
tmp=""
for i in range(lens_cards-1,-1,-1):
jj=json.loads(j['data']['cards'][i]['card'])
if 'item' in jj:
if 'timestamp' in jj['item']:
timestamp=jj['item']['timestamp']
timeArray = time.localtime(timestamp)
formatTime = time.strftime("%Y-%m-%d", timeArray)
print (formatTime)
if 'upload_time' in jj['item']:
timestamp=jj['item']['upload_time']
timeArray = time.localtime(timestamp)
formatTime = time.strftime("%Y-%m-%d", timeArray)
print (formatTime)
if 'content' in jj['item'] and "每日一题" in str(jj['item']['content']):
contents=jj['item']['content'].replace("\n","<br>")
print(contents)
if 'description' in jj['item'] and "解析" in str(jj['item']['description']):
description = jj['item']['description'].replace("\n","<br>")
print(description)
if 'pictures' in jj['item']:
pictures= jj['item']['pictures']
lens_pic=len(pictures)
print(pictures[0]['img_src'])
for pic in range(0,lens_pic):
pic_url=pictures[pic]['img_src']
print(pic_url)
savepic(formatTime,pic_url,pic)
finalcontent = str(contents)+ '<br>' +str(description)+ '<br>'
if finalcontent != tmp:
str_content = str(str_content)+'<br>' + '<br>' + str(formatTime)+ '<br>' + str(finalcontent)
tmp = finalcontent
print('--------------------')
time.sleep(2)
pdfkit.from_string(str_content,filename,configuration=config, options=pdfkit_options)
def savepic(filename,imgurl,picname):
if os.path.exists("./jx/"+filename):
print('exists file!!!')
else:
os.makedirs("./jx/"+filename)
r=requests.get(imgurl).content
with open("./jx/"+str(filename)+"/"+str(picname)+'.jpg',"wb") as f:
f.write(r)
def create_watermark(content):
"""水印信息"""
# 默认大小为21cm*29.7cm
file_name = "mark.pdf"
c = canvas.Canvas(file_name, pagesize=(21 * cm, 29.7 * cm))
# 移动坐标原点(坐标系左下为(0,0))
c.translate(5 * cm, 10 * cm)
# 设置字体
c.setFont("Helvetica", 80)
# 指定描边的颜色
c.setStrokeColorRGB(0, 1, 0)
# 指定填充颜色
c.setFillColorRGB(0, 1, 0)
# 画一个矩形
# c.rect(cm, cm, 7*cm, 17*cm, fill=1)
# 旋转45度,坐标系被旋转
c.rotate(30)
# 指定填充颜色
c.setFillColorRGB(0, 0, 0, 0.1)
# 设置透明度,1为不透明
# c.setFillAlpha(0.1)
# 画几个文本,注意坐标系旋转的影响
c.drawString(3 * cm, 0 * cm, content)
c.setFillAlpha(0.6)
# 关闭并保存pdf文件
c.save()
return file_name
def add_watermark(pdf_file_in, pdf_file_mark, pdf_file_out):
"""把水印添加到pdf中"""
pdf_output = PdfFileWriter()
input_stream = open(pdf_file_in, 'rb')
pdf_input = PdfFileReader(input_stream, strict=False)
# 获取PDF文件的页数
pageNum = pdf_input.getNumPages()
# 读入水印pdf文件
pdf_watermark = PdfFileReader(open(pdf_file_mark, 'rb'), strict=False)
# 给每一页打水印
for i in range(pageNum):
page = pdf_input.getPage(i)
page.mergePage(pdf_watermark.getPage(0))
page.compressContentStreams() # 压缩内容
pdf_output.addPage(page)
pdf_output.write(open(pdf_file_out, 'wb'))
if __name__ == '__main__':
offset_dynamic_id=[
'596643666922248396',
'597941412765972945',
'599613099937925403','601272168487401677',
'602762930168798777','603950931004835873',
'605354350221887065','607034021734160739',
'610773503734780835','612563418471648514',
'613902958643821419','615852422824317201',
'616876308669489985','618509839342691948',
'620733258013494055','623340530794036514',
'625219514568319250','627047177770485970',
'628773067432125784','630017972369358854',
'632081467918254103','634110127014674434',
'635942952759197713','636984997739233283',
'638180106740695049','640274045016735745',
'642094170555547656','643378455991287863',
'645447419045085191','646665497531645972',
'648165540655792160','650078922962108425',
'652285565245325316','0'
]
for i in range(0,len(offset_dynamic_id)):
start(offset_dynamic_id[i])
pdf_file_in = filename
pdf_file_out = filename+'_watermark.pdf'
pdf_file_mark = create_watermark('marks')
add_watermark(pdf_file_in, pdf_file_mark, pdf_file_out)
效果
唯一不理想的是,pdfkit不能自己在pdf中插入图片!