续上一篇html转pdf问题(记一次将使用python将html转pdf的辛酸史_contentdocument.documentelement.outerhtml_一心萝卜的博客-CSDN博客)
由于页面内容过长,selenium设置Chrome无头浏览器有最大高度限制,具体多少不太清楚。网上有大佬说开启GPU加速,但这个前提要硬件支持,硬件不支持的话就是白扯。上一篇那种方法简直了,遇到超大内容肯定凉凉,但是如果要截全图确实是好方法。但是想html转pdf不太友好,所以这次对方法进行改进:
其他策略不变,只将截取图表方法换成元素截图。(注释的为原先代码)
def generate_pdf(report_id):
try:
# 将页面本地静态化
chrome_options = Options()
chrome_options.add_argument('--headless')
# 创建浏览器对象
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.maximize_window()
# 创建智能等待对象
wait = WebDriverWait(browser, 60)
browser.get('http://127.0.0.1:8000/admin/admin-preview-report/?report_id={}'.format(report_id))
time.sleep(random.random() + 3)
html = browser.execute_script("return document.documentElement.outerHTML")
charts = browser.find_elements_by_xpath("//div[@class='my-echarts']")
# width = browser.execute_script(
# "return Math.max(document.body.scrollWidth,document.body.offsetWidth,document.documentElement.clientWidth,document.documentElement.scrollWidth,document.documentElement.offsetWidth);")
#
# height = browser.execute_script(
# "return Math.max(document.body.scrollHeight,document.body.offsetHeight,document.documentElement.clientHeight,document.documentElement.scrollHeight,document.documentElement.offsetHeight);")
# # resize
# browser.set_window_size(width, height)
# time.sleep(3)
# browser.get_screenshot_as_file("{}/temporary/screenshot.png".format(settings.MEDIA_ROOT)) # 临时存储到临时目录
img_name_list = [] # 临时存储截图名
for index, item in enumerate(charts):
left = item.location['x']
top = item.location['y']
right = item.location['x'] + item.size['width']
bottom = item.location['y'] + item.size['height']
# im = Image.open('{}/temporary/screenshot.png'.format(settings.MEDIA_ROOT))
# im = im.crop((left, top, right, bottom)) # 对浏览器截图进行裁剪
# im.save('{0}/temporary/cut/{1}.png'.format(settings.MEDIA_ROOT, item.location['y']))
# 生成uuid不重复名
img_name = uuid.uuid4().hex
img_name_list.append(img_name)
# item.screenshot('{0}/temporary/cut/{1}.png'.format(settings.MEDIA_ROOT, item.location['y']))
item.screenshot('{0}/temporary/cut/{1}.png'.format(settings.MEDIA_ROOT, img_name))
# 替换元素
div_id = item.get_attribute('id')
print(div_id)
tihuan1 = "document.getElementById('{}').innerHTML=''".format(div_id)
browser.execute_script(tihuan1)
tihuan2 = "var img = document.createElement('img');"
# tihuan3 = "img.setAttribute('src','{}');".format('{0}/temporary/cut/{1}.png'.format(settings.MEDIA_ROOT, item.location['y']))
# tihuan3 = "img.setAttribute('src','{}');".format('../temporary/cut/{}.png'.format(item.location['y']))
tihuan3 = "img.setAttribute('src','{}');".format('../temporary/cut/{}.png'.format(img_name))
tihuan4 = "document.getElementById('{0}').appendChild(img);".format(div_id)
browser.execute_script(tihuan2 + tihuan3 + tihuan4)
html = browser.execute_script("return document.documentElement.outerHTML") # 获取修改后页面全部内容
# print(html)
with open('{0}/temporary/my.html'.format(settings.MEDIA_ROOT), 'w+', encoding='utf8')as wf:
wf.write(html)
# 存储可编辑html
# with open('{0}/reports/{1}.html'.format(settings.MEDIA_ROOT, report_id), 'w+', encoding='utf8')as wf:
# wf.write(html)
# 本地静态化结束
# 关闭浏览器
browser.close()
# 生成pdf文件
# 将wkhtmltopdf.exe程序绝对路径传入config对象
path_wkthmltopdf = r'D:\python\wkhtmltox\wkhtmltopdf\bin\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=path_wkthmltopdf)
# 生成pdf文件,file_path为文件路径
file_path = '{0}/reports/{1}'.format(settings.MEDIA_ROOT, report_id) # 每个报告一个路径
# 判断路径是否存在(不存在创建)
if not os.path.exists(file_path):
os.makedirs(file_path)
# 文件完整路径
file_path_all = '{}/report.pdf'.format(file_path)
options = {
'--enable-local-file-access': '--enable-local-file-access',
# '--header-html': 'http://127.0.0.1:8000/admin/admin-add-logo/',
# '--header-right': Report.objects.get(id=int(report_id)).name,
# '--header-line': '--header-line',
# '--header-spacing': 5,
# '--margin-bottom': 5,
}
try:
pdfkit.from_url('{0}/temporary/my.html'.format(settings.MEDIA_ROOT), file_path_all, configuration=config,
options=options)
except Exception as e:
print(e)
欢迎关注wx公众号:python web小栈,共同探讨学习