Python之HTML转PDF

最新推荐文章于 2024-07-18 15:53:23 发布

阿汤哥的程序之路

最新推荐文章于 2024-07-18 15:53:23 发布

阅读量64

点赞数

分类专栏： python 文章标签： python html pdf

原文链接：https://apitemplate.io/blog/how-to-convert-html-to-pdf-using-python/#conclusion

版权

python 专栏收录该内容

81 篇文章 3 订阅

订阅专栏

方式一pyppeteer

pip install pyppeteer

Generate PDF from a website URL

import asyncio
from pyppeteer import launch

async def generate_pdf(url, pdf_path):
    browser = await launch()
    page = await browser.newPage()
    
    await page.goto(url)
    
    await page.pdf({'path': pdf_path, 'format': 'A4'})
    
    await browser.close()

# Run the function
asyncio.get_event_loop().run_until_complete(generate_pdf('https://example.com', 'example.pdf'))

Generate PDF from Custom HTML content

import asyncio
from pyppeteer import launch

async def generate_pdf_from_html(html_content, pdf_path):
    browser = await launch()
    page = await browser.newPage()
    
    await page.setContent(html_content)
    
    await page.pdf({'path': pdf_path, 'format': 'A4'})
    
    await browser.close()

# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>PDF Example</title>
</head>
<body>
    <h1>Hello, world!</h1>
</body>
</html>
'''

# Run the function
asyncio.get_event_loop().run_until_complete(generate_pdf_from_html(html_content, 'from_html.pdf'))

方式二xhtml2pdf

pip install xhtml2pdf requests

To generate PDF from a website URL

from xhtml2pdf import pisa
import requests

def convert_url_to_pdf(url, pdf_path):
    # Fetch the HTML content from the URL
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch URL: {url}")
        return False
    
    html_content = response.text
    
    # Generate PDF
    with open(pdf_path, "wb") as pdf_file:
        pisa_status = pisa.CreatePDF(html_content, dest=pdf_file)
        
    return not pisa_status.err

# URL to fetch
url_to_fetch = "https://google.com"

# PDF path to save
pdf_path = "google.pdf"

# Generate PDF
if convert_url_to_pdf(url_to_fetch, pdf_path):
    print(f"PDF generated and saved at {pdf_path}")
else:
    print("PDF generation failed")

Generating PDF from custom HTML content

from xhtml2pdf import pisa

def convert_html_to_pdf(html_string, pdf_path):
    with open(pdf_path, "wb") as pdf_file:
        pisa_status = pisa.CreatePDF(html_string, dest=pdf_file)
        
    return not pisa_status.err

# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>PDF Example</title>
</head>
<body>
    <h1>Hello, world!</h1>
</body>
</html>
'''

# Generate PDF
pdf_path = "example.pdf"
if convert_html_to_pdf(html_content, pdf_path):
    print(f"PDF generated and saved at {pdf_path}")
else:
    print("PDF generation failed")

中文问题

解决

# pip install xhtml2pdf requests -i https://pypi.tuna.tsinghua.edu.cn/simple/

from xhtml2pdf import pisa
from jinja2 import Template

html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>PDF Example</title>
</head>
<style>
p { font-family: STSong-Light }
div { font-family: STSong-Light }
</style>
<body >
    <div class="container" style="width: 450px;">
    <div class="notice" style="font-size:0.8rem">
        <span>内部参考</span><br />
        <span>请勿外传</span>
    </div>
    <h2 class="title" style="color: red; text-align: center; padding: 1rem;">{{admin_reminder_title}}</h2>
    <p class="subtitle" style="text-align: center;">第 {{volume}} 期 (总第 {{total_volume}} 期)</p>
    <p class="header" style=" padding: 0.25rem;  text-align: center; border-bottom: 0.2rem solid red;">无 锡 市 城 市 运 行
        管 理 中 心 {{alert_year}} 年 {{alert_month}} 月 {{alert_day}} 日</p>
    <p>{{department_name}}：</p>
    <p><span style="padding-left: 2rem"></span>
        近期，市城运中心经研判分析发现反映涉及你单位相对集中的诉求情况如下：</p>
    <p>预警名称：{{alert_name}}
    </p>
    <p>
        预警内容：{{admin_reminder_content}}
    </p>
    <p>
        预警建议：{{admin_reminder_suggestion}}
    </p>
    <p>领导批示：{{admin_reminder_instruction}}</p>
    <p>附件上传：{{admin_reminder_attachment}}
    </p>
</div>
</body>
</html>
'''


def render_yaml(temp_str, **kwargs):
    # if isinstance(temp_str, bytes):
    #     temp_str = temp_str.decode("utf-8")

    template = Template(temp_str)
    return template.render(**kwargs)


kwargs = {"volume": "1",
          "total_volume": 120,
          "department_name": "公安局",
          "alert_year": "2024",
          "alert_month": '6',
          "alert_day": '12',
          "alert_name": "社会舆情热点",
          "admin_reminder_content": "学生v考虑撒娇看FNH阿达数据库连接",
          "admin_reminder_instruction": "先这样吧",
          "admin_reminder_suggestion": "KLKSJDJLSKASSDANS11111111",
          "admin_reminder_attachment": "123333333"
          }

html_content = render_yaml(html_content, **kwargs)

# font_path = "./simsun.ttc"
# font_path = r"\\wsl$\Ubuntu-22.04\home\weijie\weijie\code\practice\A_days\simsun.ttc"


def convert_html_to_pdf(html_string, pdf_path):
    with open(pdf_path, "wb") as pdf_file:
        # pisa_status = pisa.CreatePDF(html_string, dest=pdf_file, font_path=font_path)
        pisa_status = pisa.CreatePDF(html_string, dest=pdf_file)

    return not pisa_status.err


# Generate PDF
pdf_path = "example-test.pdf"
if convert_html_to_pdf(html_content, pdf_path):
    print(f"PDF generated and saved at {pdf_path}")
else:
    print("PDF generation failed")

方式三python-pdfkit

pip install pdfkit

To generate PDF from website URL

import pdfkit

def convert_url_to_pdf(url, pdf_path):
    try:
        pdfkit.from_url(url, pdf_path)
        print(f"PDF generated and saved at {pdf_path}")
    except Exception as e:
        print(f"PDF generation failed: {e}")

# URL to fetch
url_to_fetch = 'https://example.com'

# PDF path to save
pdf_path = 'example_from_url.pdf'

# Generate PDF
convert_url_to_pdf(url_to_fetch, pdf_path)

Generating PDF from custom HTML content

import pdfkit

def convert_html_to_pdf(html_content, pdf_path):
    try:
        pdfkit.from_string(html_content, pdf_path)
        print(f"PDF generated and saved at {pdf_path}")
    except Exception as e:
        print(f"PDF generation failed: {e}")

# HTML content
html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>PDF Example</title>
</head>
<body>
    <h1>Hello, world!</h1>
</body>
</html>
'''

# PDF path to save
pdf_path = 'example_from_html.pdf'

# Generate PDF
convert_html_to_pdf(html_content, pdf_path)

方式四Playwright

pip install playwright
playwright install

Generate PDF from website URL

import asyncio
from playwright.async_api import async_playwright

async def url_to_pdf(url, output_path):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.goto(url)
        await page.pdf(path=output_path)
        await browser.close()

# Example usage
url = 'https://google.com'
output_path = 'html-to-pdf-output.pdf'

asyncio.run(url_to_pdf(url, output_path))

Generate PDF from custom HTML content

import asyncio
from playwright.async_api import async_playwright

async def html_to_pdf(html_content, output_path):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.set_content(html_content)
        await page.pdf(path=output_path)
        await browser.close()

html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sample HTML</title>
</head>
<body>
    <h1>Hello, World!</h1>
    <p>This is a sample HTML content to be converted to PDF.</p>
</body>
</html>
'''
output_path = 'custom-html-to-pdf-output.pdf'

asyncio.run(html_to_pdf(html_content, output_path))

方式五WeasyPrint

pip install WeasyPrint

Generate PDF from website URL

from weasyprint import HTML

def url_to_pdf(url, output_path):
    HTML(url).write_pdf(output_path)

# Example usage
url = 'https://google.com'
output_path = 'output_url.pdf'

url_to_pdf(url, output_path)

Generate PDF from custom HTML content

from weasyprint import HTML

def html_to_pdf(html_content, output_path):
    HTML(string=html_content).write_pdf(output_path)

html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sample HTML</title>
</head>
<body>
    <h1>Hello, World!</h1>
    <p>This is a sample HTML content to be converted to PDF.</p>
</body>
</html>
'''
output_path = 'output_html.pdf'

html_to_pdf(html_content, output_path)

详情