import base64
from lxml import etree
import pdfkit
import requests
from scrapy.http import HtmlResponse
def save_pdf(title, description, content, file_path):
html = """
<head>
<meta charset="UTF-8">
</head>
<h1>{title}</h1>
<div style="font-size:18px;">
<p>{description}</p>
</div>
<div>{content}</div>
"""
dom = etree.HTML(content)
img_node_list = dom.xpath('//img')
for img_node in img_node_list:
src = img_node.get('src')
img_resp = requests.get(src, timeout=60)
img_con = img_resp.content
base64_data = base64.b64encode(img_con)
s = base64_data.decode()
src = 'data:image/png;base64,%s' % s
img_node.set('src', src)
content = etree.tounicode(dom.xpath('//body/*')[0], method='html')
html = html.format(title=title, description=description, content=content)
options = {'enable-local-file-access': None}
pdfkit.from_string(html, file_path, options=options)
def save_url(req_url, x_path, file_path):
res = requests.get(req_url, headers={
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
})
res = HtmlResponse(req_url, body=res.content.decode(), encoding="utf-8")
save_pdf('', '', res.xpath(x_path).get(), file_path)
if __name__ == '__main__':
save_pdf('计算ASK接收机的灵敏度',
'RFIC幅移键控(ASK)或者叫做开关键控(OOK)接收机的灵敏度对于远程无线开门系统(RKE)、轮胎压力监视系统(TPM)、家庭自动化系统以及其它应用系统的设计者来说是一项重要的规范。这类接收机一般工作在315MHz或433MHz的频段上,但是其电路对其它载波频率也是适用的。',
'<div></div>', 'test4.pdf')
保存html页面,生成PDF文件
最新推荐文章于 2024-08-06 17:20:59 发布