方式一pyppeteer
pip install pyppeteer
Generate PDF from a website URL
import asyncio
from pyppeteer import launch
async def generate_pdf(url, pdf_path):
browser = await launch()
page = await browser.newPage()
await page.goto(url)
await page.pdf({'path': pdf_path, 'format': 'A4'})
await browser.close()
# Run the function
asyncio.get_event_loop().run_until_complete(generate_pdf('https://example.com', 'example.pdf'))
Generate PDF from Custom HTML content
import asyncio
from pyppeteer import launch
async def generate_pdf_from_html(html_content, pdf_path):
browser = await launch()
page = await browser.newPage()
await page.setContent(html_content)
await page.pdf({'path': pdf_path, 'format': 'A4'})
await browser.close()
# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
<title>PDF Example</title>
</head>
<body>
<h1>Hello, world!</h1>
</body>
</html>
'''
# Run the function
asyncio.get_event_loop().run_until_complete(generate_pdf_from_html(html_content, 'from_html.pdf'))
方式二xhtml2pdf
pip install xhtml2pdf requests
To generate PDF from a website URL
from xhtml2pdf import pisa
import requests
def convert_url_to_pdf(url, pdf_path):
# Fetch the HTML content from the URL
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to fetch URL: {url}")
return False
html_content = response.text
# Generate PDF
with open(pdf_path, "wb") as pdf_file:
pisa_status = pisa.CreatePDF(html_content, dest=pdf_file)
return not pisa_status.err
# URL to fetch
url_to_fetch = "https://google.com"
# PDF path to save
pdf_path = "google.pdf"
# Generate PDF
if convert_url_to_pdf(url_to_fetch, pdf_path):
print(f"PDF generated and saved at {pdf_path}")
else:
print("PDF generation failed")
Generating PDF from custom HTML content
from xhtml2pdf import pisa
def convert_html_to_pdf(html_string, pdf_path):
with open(pdf_path, "wb") as pdf_file:
pisa_status = pisa.CreatePDF(html_string, dest=pdf_file)
return not pisa_status.err
# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
<title>PDF Example</title>
</head>
<body>
<h1>Hello, world!</h1>
</body>
</html>
'''
# Generate PDF
pdf_path = "example.pdf"
if convert_html_to_pdf(html_content, pdf_path):
print(f"PDF generated and saved at {pdf_path}")
else:
print("PDF generation failed")
中文问题
# pip install xhtml2pdf requests -i https://pypi.tuna.tsinghua.edu.cn/simple/
from xhtml2pdf import pisa
from jinja2 import Template
html_content = '''
<!DOCTYPE html>
<html>
<head>
<title>PDF Example</title>
</head>
<style>
p { font-family: STSong-Light }
div { font-family: STSong-Light }
</style>
<body >
<div class="container" style="width: 450px;">
<div class="notice" style="font-size:0.8rem">
<span>内部参考</span><br />
<span>请勿外传</span>
</div>
<h2 class="title" style="color: red; text-align: center; padding: 1rem;">{{admin_reminder_title}}</h2>
<p class="subtitle" style="text-align: center;">第 {{volume}} 期 (总第 {{total_volume}} 期)</p>
<p class="header" style=" padding: 0.25rem; text-align: center; border-bottom: 0.2rem solid red;">无 锡 市 城 市 运 行
管 理 中 心 {{alert_year}} 年 {{alert_month}} 月 {{alert_day}} 日</p>
<p>{{department_name}}:</p>
<p><span style="padding-left: 2rem"></span>
近期,市城运中心经研判分析发现反映涉及你单位相对集中的诉求情况如下:</p>
<p>预警名称:{{alert_name}}
</p>
<p>
预警内容:{{admin_reminder_content}}
</p>
<p>
预警建议:{{admin_reminder_suggestion}}
</p>
<p>领导批示:{{admin_reminder_instruction}}</p>
<p>附件上传:{{admin_reminder_attachment}}
</p>
</div>
</body>
</html>
'''
def render_yaml(temp_str, **kwargs):
# if isinstance(temp_str, bytes):
# temp_str = temp_str.decode("utf-8")
template = Template(temp_str)
return template.render(**kwargs)
kwargs = {"volume": "1",
"total_volume": 120,
"department_name": "公安局",
"alert_year": "2024",
"alert_month": '6',
"alert_day": '12',
"alert_name": "社会舆情热点",
"admin_reminder_content": "学生v考虑撒娇看FNH阿达数据库连接",
"admin_reminder_instruction": "先这样吧",
"admin_reminder_suggestion": "KLKSJDJLSKASSDANS11111111",
"admin_reminder_attachment": "123333333"
}
html_content = render_yaml(html_content, **kwargs)
# font_path = "./simsun.ttc"
# font_path = r"\\wsl$\Ubuntu-22.04\home\weijie\weijie\code\practice\A_days\simsun.ttc"
def convert_html_to_pdf(html_string, pdf_path):
with open(pdf_path, "wb") as pdf_file:
# pisa_status = pisa.CreatePDF(html_string, dest=pdf_file, font_path=font_path)
pisa_status = pisa.CreatePDF(html_string, dest=pdf_file)
return not pisa_status.err
# Generate PDF
pdf_path = "example-test.pdf"
if convert_html_to_pdf(html_content, pdf_path):
print(f"PDF generated and saved at {pdf_path}")
else:
print("PDF generation failed")
方式三python-pdfkit
pip install pdfkit
To generate PDF from website URL
import pdfkit
def convert_url_to_pdf(url, pdf_path):
try:
pdfkit.from_url(url, pdf_path)
print(f"PDF generated and saved at {pdf_path}")
except Exception as e:
print(f"PDF generation failed: {e}")
# URL to fetch
url_to_fetch = 'https://example.com'
# PDF path to save
pdf_path = 'example_from_url.pdf'
# Generate PDF
convert_url_to_pdf(url_to_fetch, pdf_path)
Generating PDF from custom HTML content
import pdfkit
def convert_html_to_pdf(html_content, pdf_path):
try:
pdfkit.from_string(html_content, pdf_path)
print(f"PDF generated and saved at {pdf_path}")
except Exception as e:
print(f"PDF generation failed: {e}")
# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
<title>PDF Example</title>
</head>
<body>
<h1>Hello, world!</h1>
</body>
</html>
'''
# PDF path to save
pdf_path = 'example_from_html.pdf'
# Generate PDF
convert_html_to_pdf(html_content, pdf_path)
方式四Playwright
pip install playwright
playwright install
Generate PDF from website URL
import asyncio
from playwright.async_api import async_playwright
async def url_to_pdf(url, output_path):
async with async_playwright() as p:
browser = await p.chromium.launch()
page = await browser.new_page()
await page.goto(url)
await page.pdf(path=output_path)
await browser.close()
# Example usage
url = 'https://google.com'
output_path = 'html-to-pdf-output.pdf'
asyncio.run(url_to_pdf(url, output_path))
Generate PDF from custom HTML content
import asyncio
from playwright.async_api import async_playwright
async def html_to_pdf(html_content, output_path):
async with async_playwright() as p:
browser = await p.chromium.launch()
page = await browser.new_page()
await page.set_content(html_content)
await page.pdf(path=output_path)
await browser.close()
html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sample HTML</title>
</head>
<body>
<h1>Hello, World!</h1>
<p>This is a sample HTML content to be converted to PDF.</p>
</body>
</html>
'''
output_path = 'custom-html-to-pdf-output.pdf'
asyncio.run(html_to_pdf(html_content, output_path))
方式五WeasyPrint
pip install WeasyPrint
Generate PDF from website URL
from weasyprint import HTML
def url_to_pdf(url, output_path):
HTML(url).write_pdf(output_path)
# Example usage
url = 'https://google.com'
output_path = 'output_url.pdf'
url_to_pdf(url, output_path)
Generate PDF from custom HTML content
from weasyprint import HTML
def html_to_pdf(html_content, output_path):
HTML(string=html_content).write_pdf(output_path)
html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sample HTML</title>
</head>
<body>
<h1>Hello, World!</h1>
<p>This is a sample HTML content to be converted to PDF.</p>
</body>
</html>
'''
output_path = 'output_html.pdf'
html_to_pdf(html_content, output_path)