python从TXT创建PDF文件——reportlab

使用reportlab创建PDF文件

电子书一般都是txt格式的,某些电子阅读器不能读取txt的文档,如DPT-RP1。因此本文从使用python实现txt到pdf的转换,并且支持生成目录,目录能够生成连接进行点击(前提是在txt文件中能够知道每个章节的位置),支持中文。

reportlab的使用可以查看reportlab官方文档。txt转pdf详细代码如下:

# coding: utf-8

# setting sts font utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import BaseDocTemplate, Frame, PageTemplate, Paragraph
from  reportlab.platypus.tableofcontents import TableOfContents
from  reportlab.platypus import PageBreak
from reportlab.lib.pagesizes import A4

pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font

class MyDocTemplate(BaseDocTemplate):
    def __init__(self, filename, **kw):
        self.allowSplitting = 0
        apply(BaseDocTemplate.__init__, (self, filename), kw)

    # Entries to the table of contents can be done either manually by
    # calling the addEntry method on the TableOfContents object or automatically
    # by sending a 'TOCEntry' notification in the afterFlowable method of
    # the DocTemplate you are using. The data to be passed to notify is a list
    # of three or four items countaining a level number, the entry text, the page
    # number and an optional destination key which the entry should point to.
    # This list will usually be created in a document template's method like
    # afterFlowable(), making notification calls using the notify() method
    # with appropriate data.

    def afterFlowable(self, flowable):
        "Registers TOC entries."
        if flowable.__class__.__name__ == 'Paragraph':
            text = flowable.getPlainText()
            style = flowable.style.name
            if style == 'Heading1':
                level = 0
            elif style == 'simhei':
                level = 1
            else:
                return
            E = [level, text, self.page]
            #if we have a bookmark name append that to our notify data
            bn = getattr(flowable,'_bookmarkName',None)
            if bn is not None: E.append(bn)
            self.notify('TOCEntry', tuple(E))


# this function makes our headings
def doHeading(data, text, sty):
    from hashlib import sha1
    # create bookmarkname
    bn = sha1(text).hexdigest()
    # modify paragraph text to include an anchor point with name bn
    h = Paragraph(text + '<a name="%s"/>' % bn, sty)
    # store the bookmark name on the flowable so afterFlowable can see this
    h._bookmarkName = bn
    data.append(h)

# Page Number
def footer(canvas, doc):
    page_num = canvas.getPageNumber()
    canvas.saveState()
    P = Paragraph("%d" % page_num ,
                  styles['Normal'])
    w, h = P.wrap(doc.width, doc.bottomMargin)
    P.drawOn(canvas, doc.leftMargin + w/2, h)
    canvas.restoreState()

# load txt file
def loadTxt(txt_path):
    with open(txt_path, 'r') as f:
        txt_datas = f.readlines()
    return txt_datas

def toPDF(txt_datas, pdf_path):
    PDF = MyDocTemplate(pdf_path, pagesize=A4)
    frame = Frame(PDF.leftMargin, PDF.bottomMargin, PDF.width, PDF.height,
                  id='normal')
    template = PageTemplate(frames=frame, onPage=footer)
    PDF.addPageTemplates([template])

    data = []

    # table of contents
    toc = TableOfContents()
    # setting contents fontName and fontSize
    toc.levelStyles = [
        ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
           leading=16),
        ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
    ]
    data.append(toc) # add contents
    data.append(PageBreak())  #next page

    NUM = 0
    # add txt
    for txt_data in txt_datas:
        txt_data = txt_data.lstrip() # remove left space
        if len(txt_data) == 0: # no text
            continue
        try:
            txt_data = txt_data.decode("gb2312")
        except:
            txt_data = txt_data.decode("gbk")

        if txt_data[0] == u"第" and (u"章" in txt_data):
            doHeading(data, txt_data, styles['simhei'])
        else:
            data.append(Paragraph(txt_data, styles['STSONG']))
        NUM = NUM + 1
        print('{} line'.format(NUM))

    print('Build pdf!')
    PDF.multiBuild(data)

if __name__ == "__main__":
    txt_path = "财运天降.txt".decode("utf8")
    pdf_path = "财运天降.pdf".decode("utf8")
    txt_datas = loadTxt(txt_path)
    toPDF(txt_datas, pdf_path)

本代码在windows和python2下进行测试,主要注意有:

  • 系统默认字体设置:
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
  • 中文字体支持:
pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font
  • 中文目录字体:
    toc.levelStyles = [
        ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
           leading=16),
        ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
    ]
  • 目录定位,这个需要根据你实际的txt文章进行定位修改
        if txt_data[0] == u"第" and (u"章" in txt_data):
  • 中文解码,由于繁体中文不能解码为gb2312,因此使用try-except的方式
        try:
            txt_data = txt_data.decode("gb2312")
        except:
            txt_data = txt_data.decode("gbk")

其效果如下:

网上随便找了个txt文章:
reult1
生成pdf目录:
result2
生成pdf内容:
result3

  • 2
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值