Python之HTML转PDF

方式一pyppeteer

pip install pyppeteer

Generate PDF from a website URL

import asyncio
from pyppeteer import launch

async def generate_pdf(url, pdf_path):
    browser = await launch()
    page = await browser.newPage()
    
    await page.goto(url)
    
    await page.pdf({'path': pdf_path, 'format': 'A4'})
    
    await browser.close()

# Run the function
asyncio.get_event_loop().run_until_complete(generate_pdf('https://example.com', 'example.pdf'))

Generate PDF from Custom HTML content

import asyncio
from pyppeteer import launch

async def generate_pdf_from_html(html_content, pdf_path):
    browser = await launch()
    page = await browser.newPage()
    
    await page.setContent(html_content)
    
    await page.pdf({'path': pdf_path, 'format': 'A4'})
    
    await browser.close()

# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>PDF Example</title>
</head>
<body>
    <h1>Hello, world!</h1>
</body>
</html>
'''

# Run the function
asyncio.get_event_loop().run_until_complete(generate_pdf_from_html(html_content, 'from_html.pdf'))

方式二xhtml2pdf

pip install xhtml2pdf requests

To generate PDF from a website URL

from xhtml2pdf import pisa
import requests

def convert_url_to_pdf(url, pdf_path):
    # Fetch the HTML content from the URL
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch URL: {url}")
        return False
    
    html_content = response.text
    
    # Generate PDF
    with open(pdf_path, "wb") as pdf_file:
        pisa_status = pisa.CreatePDF(html_content, dest=pdf_file)
        
    return not pisa_status.err

# URL to fetch
url_to_fetch = "https://google.com"

# PDF path to save
pdf_path = "google.pdf"

# Generate PDF
if convert_url_to_pdf(url_to_fetch, pdf_path):
    print(f"PDF generated and saved at {pdf_path}")
else:
    print("PDF generation failed")

Generating PDF from custom HTML content

from xhtml2pdf import pisa

def convert_html_to_pdf(html_string, pdf_path):
    with open(pdf_path, "wb") as pdf_file:
        pisa_status = pisa.CreatePDF(html_string, dest=pdf_file)
        
    return not pisa_status.err

# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>PDF Example</title>
</head>
<body>
    <h1>Hello, world!</h1>
</body>
</html>
'''

# Generate PDF
pdf_path = "example.pdf"
if convert_html_to_pdf(html_content, pdf_path):
    print(f"PDF generated and saved at {pdf_path}")
else:
    print("PDF generation failed")

中文问题

解决

# pip install xhtml2pdf requests -i https://pypi.tuna.tsinghua.edu.cn/simple/

from xhtml2pdf import pisa
from jinja2 import Template

html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>PDF Example</title>
</head>
<style>
p { font-family: STSong-Light }
div { font-family: STSong-Light }
</style>
<body >
    <div class="container" style="width: 450px;">
    <div class="notice" style="font-size:0.8rem">
        <span>内部参考</span><br />
        <span>请勿外传</span>
    </div>
    <h2 class="title" style="color: red; text-align: center; padding: 1rem;">{{admin_reminder_title}}</h2>
    <p class="subtitle" style="text-align: center;">第 {{volume}} 期 (总第 {{total_volume}} 期)</p>
    <p class="header" style=" padding: 0.25rem;  text-align: center; border-bottom: 0.2rem solid red;">无 锡 市 城 市 运 行
        管 理 中 心 {{alert_year}} 年 {{alert_month}} 月 {{alert_day}} 日</p>
    <p>{{department_name}}:</p>
    <p><span style="padding-left: 2rem"></span>
        近期,市城运中心经研判分析发现反映涉及你单位相对集中的诉求情况如下:</p>
    <p>预警名称:{{alert_name}}
    </p>
    <p>
        预警内容:{{admin_reminder_content}}
    </p>
    <p>
        预警建议:{{admin_reminder_suggestion}}
    </p>
    <p>领导批示:{{admin_reminder_instruction}}</p>
    <p>附件上传:{{admin_reminder_attachment}}
    </p>
</div>
</body>
</html>
'''


def render_yaml(temp_str, **kwargs):
    # if isinstance(temp_str, bytes):
    #     temp_str = temp_str.decode("utf-8")

    template = Template(temp_str)
    return template.render(**kwargs)


kwargs = {"volume": "1",
          "total_volume": 120,
          "department_name": "公安局",
          "alert_year": "2024",
          "alert_month": '6',
          "alert_day": '12',
          "alert_name": "社会舆情热点",
          "admin_reminder_content": "学生v考虑撒娇看FNH阿达数据库连接",
          "admin_reminder_instruction": "先这样吧",
          "admin_reminder_suggestion": "KLKSJDJLSKASSDANS11111111",
          "admin_reminder_attachment": "123333333"
          }

html_content = render_yaml(html_content, **kwargs)

# font_path = "./simsun.ttc"
# font_path = r"\\wsl$\Ubuntu-22.04\home\weijie\weijie\code\practice\A_days\simsun.ttc"


def convert_html_to_pdf(html_string, pdf_path):
    with open(pdf_path, "wb") as pdf_file:
        # pisa_status = pisa.CreatePDF(html_string, dest=pdf_file, font_path=font_path)
        pisa_status = pisa.CreatePDF(html_string, dest=pdf_file)

    return not pisa_status.err


# Generate PDF
pdf_path = "example-test.pdf"
if convert_html_to_pdf(html_content, pdf_path):
    print(f"PDF generated and saved at {pdf_path}")
else:
    print("PDF generation failed")

方式三python-pdfkit

pip install pdfkit

To generate PDF from website URL

import pdfkit

def convert_url_to_pdf(url, pdf_path):
    try:
        pdfkit.from_url(url, pdf_path)
        print(f"PDF generated and saved at {pdf_path}")
    except Exception as e:
        print(f"PDF generation failed: {e}")

# URL to fetch
url_to_fetch = 'https://example.com'

# PDF path to save
pdf_path = 'example_from_url.pdf'

# Generate PDF
convert_url_to_pdf(url_to_fetch, pdf_path)

Generating PDF from custom HTML content

import pdfkit

def convert_html_to_pdf(html_content, pdf_path):
    try:
        pdfkit.from_string(html_content, pdf_path)
        print(f"PDF generated and saved at {pdf_path}")
    except Exception as e:
        print(f"PDF generation failed: {e}")

# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>PDF Example</title>
</head>
<body>
    <h1>Hello, world!</h1>
</body>
</html>
'''

# PDF path to save
pdf_path = 'example_from_html.pdf'

# Generate PDF
convert_html_to_pdf(html_content, pdf_path)

方式四Playwright

pip install playwright
playwright install

Generate PDF from website URL

import asyncio
from playwright.async_api import async_playwright

async def url_to_pdf(url, output_path):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.goto(url)
        await page.pdf(path=output_path)
        await browser.close()

# Example usage
url = 'https://google.com'
output_path = 'html-to-pdf-output.pdf'

asyncio.run(url_to_pdf(url, output_path))

Generate PDF from custom HTML content

import asyncio
from playwright.async_api import async_playwright

async def html_to_pdf(html_content, output_path):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.set_content(html_content)
        await page.pdf(path=output_path)
        await browser.close()

html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sample HTML</title>
</head>
<body>
    <h1>Hello, World!</h1>
    <p>This is a sample HTML content to be converted to PDF.</p>
</body>
</html>
'''
output_path = 'custom-html-to-pdf-output.pdf'

asyncio.run(html_to_pdf(html_content, output_path))

方式五WeasyPrint

pip install WeasyPrint

Generate PDF from website URL

from weasyprint import HTML

def url_to_pdf(url, output_path):
    HTML(url).write_pdf(output_path)

# Example usage
url = 'https://google.com'
output_path = 'output_url.pdf'

url_to_pdf(url, output_path)

Generate PDF from custom HTML content

from weasyprint import HTML

def html_to_pdf(html_content, output_path):
    HTML(string=html_content).write_pdf(output_path)

html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sample HTML</title>
</head>
<body>
    <h1>Hello, World!</h1>
    <p>This is a sample HTML content to be converted to PDF.</p>
</body>
</html>
'''
output_path = 'output_html.pdf'

html_to_pdf(html_content, output_path)

详情

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值