HTML转换为PDF格式

 

package com.util;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chapter;
import com.itextpdf.text.DocListener;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontProvider;
import com.itextpdf.text.Image;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Section;
import com.itextpdf.text.html.simpleparser.ChainedProperties;
import com.itextpdf.text.html.simpleparser.HTMLWorker;
import com.itextpdf.text.html.simpleparser.ImageProvider;
import com.itextpdf.text.html.simpleparser.StyleSheet;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;

/**
 * PDF格式的文件
 * 读取HTML文件
 * 使用PDF格式显示出来
 * 使用到标签
 *
 *
 * @author 姚腾蛟
 *
 * May 24, 2011
 */
public class HtmlToPDFHandle {
    public static final String        FONT      = "c:/windows/fonts/STFANGSO.TTF"; //设置的文字字体类型,宋体
    protected StyleSheet              styles    = null;
    protected HashMap<String, Object> providers = null;
   
    /**
     * 将html类型的文件,转换为PDF格式
     *
     * @param htmlPath:被读取的HTML文件路径
     * @param listTitles:最外层的标题集合
     * @param pdfPath:最终会生成的PDf格式文件的路径
     * @throws DocumentException
     * @throws IOException
     */
    public void readHtmlToPdf ( List<HtmlToPdfBean> listTitles, String pdfPath ) throws DocumentException, IOException {
        HtmlToPDFHandle readHtml = new HtmlToPDFHandle ();//创建对象
        HashMap<String, Object> map = new HashMap<String, Object> ();//文字与图片的解析开始
        map.put ( HTMLWorker.FONT_PROVIDER, new MyFontFactory () );//解析文字
        map.put ( HTMLWorker.IMG_PROVIDER, new MyImageFactory () );//解析图片
        readHtml.setProviders ( map );//文字与图片的解析结束
        Document document = new Document ();//开始读取文件
        PdfWriter.getInstance ( document, new FileOutputStream ( pdfPath ) );
        document.open ();
        chapterHandle ( listTitles, readHtml, document );//章节的处理
        document.close ();
    }
   
    /**
     * 父章节的处理
     *
     * @param listTitles
     * @param readHtml
     * @param document
     * @throws DocumentException
     * @throws IOException
     */
    private void chapterHandle ( List<HtmlToPdfBean> listTitles, HtmlToPDFHandle readHtml, Document document )
            throws DocumentException, IOException {
        for (int i = 0; i < listTitles.size (); i++) { //循环最外层的章节
            HtmlToPdfBean parent = listTitles.get ( i );
            if (parent.getParentId () == 0) {//判断添加最外层的节点开始
                Paragraph title = titleShow ( parent.getTitleName (), 24 );
                title.setSpacingBefore ( 10 );//设置段落与段落之间的间隔
                title.setSpacingAfter ( 10 );
                Chapter chapter = new Chapter ( title, 1 );
                chapter.setBookmarkTitle ( parent.getTitleName () );
                chapter.setIndentation ( 30 );
                chapter.setBookmarkOpen ( false );
                addChapterElement ( readHtml, parent.getHtmlPath (), chapter );//读取最外层的节点HTML文件
                //子章节的操作
                childSectionHandle ( readHtml, parent, listTitles, chapter );
                document.add ( chapter );//外层章节要到最后才能够添加
            }
        }
    }
   
    /**
     * 子章节的处理
     *
     * @param readHtml
     * @param parent
     * @param listTitles
     * @param chapter
     * @throws FileNotFoundException
     * @throws IOException
     * @throws DocumentException
     */
    private void childSectionHandle ( HtmlToPDFHandle readHtml, HtmlToPdfBean parent, List<HtmlToPdfBean> listTitles,
            Chapter chapter ) throws FileNotFoundException, IOException, DocumentException {
        List<HtmlToPdfBean> children = readHtml.cycleJudgeChildren ( parent, listTitles );
        for (HtmlToPdfBean c : children) {
            Section section = chapter.addSection ( titleShow ( c.getTitleName (), 18 ) );//添加子节点
            sectionInit ( section, c );//为属性赋值
            addSectionElement ( readHtml, c.getHtmlPath (), section );//读取Section类型的html文件
            addChildSection ( readHtml, section, c, listTitles );//读取所属的子节点
        }
    }
   
    /**
     * 为Section类型的对象赋值
     *
     * @param section:Section类型的对象
     * @param c:为HtmlToPdfHelper的对象
     */
    private void sectionInit ( Section section, HtmlToPdfBean c ) {
        section.setBookmarkTitle ( c.getTitleName () );
        section.setIndentation ( 30 );
        section.setBookmarkOpen ( false );
    }
   
    /**
     * 创建Paragraph对象
     *
     * @param title:需要显示出来的信息
     * @param fontSize:字体大小
     * @return
     * @throws DocumentException
     * @throws IOException
     */
    private Paragraph titleShow ( String title, int fontSize ) throws DocumentException, IOException {
        BaseFont bf = BaseFont.createFont ( FONT, BaseFont.IDENTITY_H, BaseFont.EMBEDDED );
        //设置段落的内容与字体
        Paragraph p = new Paragraph ( title, new Font ( bf, fontSize ) );
        //设置段落与段落之间的间隔
        p.setSpacingAfter ( 10 );
        p.setSpacingBefore ( 10 );
        return p;
    }
   
    /**
     * 读取html文件并转换为pdf格式文件
     * Section类型的节点
     *
     * @param readHtml
     * @param htmlPath
     * @param section
     * @throws FileNotFoundException
     * @throws IOException
     */
    private void addSectionElement ( HtmlToPDFHandle readHtml, String htmlPath, Section section )
            throws FileNotFoundException, IOException {
        List<Element> childrenElements = readHtml.getHtmlElement ( htmlPath );
        if (childrenElements != null) {
            for (Element ce : childrenElements) {
                section.add ( ce );//添加内容
            }
        }
    }
   
    /**
     * 读取html文件并转换为pdf格式文件
     * Chapter类型的节点
     *
     * @param readHtml
     * @param htmlPath
     * @param chapter
     * @throws FileNotFoundException
     * @throws IOException
     */
    private void addChapterElement ( HtmlToPDFHandle readHtml, String htmlPath, Chapter chapter )
            throws FileNotFoundException, IOException {
        List<Element> childrenElements = readHtml.getHtmlElement ( htmlPath );
        if (childrenElements != null) {
            for (Element ce : childrenElements) {
                chapter.add ( ce );//添加内容
            }
        }
    }
   
    /**
     * 循环添加子节点
     *
     * @param sectionParent:类型为Section的父节点
     * @param parent:类型为HtmlToPdfHelper的对象
     * @param listTitles:类型为List<HtmlToPdfHelper>的集合
     * @throws FileNotFoundException
     * @throws IOException
     * @throws DocumentException
     */
    public void addChildSection ( HtmlToPDFHandle readHtml, Section sectionParent, HtmlToPdfBean parent,
            List<HtmlToPdfBean> listTitles ) throws FileNotFoundException, IOException, DocumentException {
        List<HtmlToPdfBean> children = cycleJudgeChildren ( parent, listTitles );//获得对应的子章节集合
        for (HtmlToPdfBean child : children) {//循环集合
            Section sub = sectionParent.addSection ( titleShow ( child.getTitleName (), 18 ) );//添加子章节的点
            sectionInit ( sub, child );//为属性赋值
            addSectionElement ( readHtml, child.getHtmlPath (), sub );//从html中读取信息
            addChildSection ( readHtml, sub, child, listTitles );//递归调用
        }
    }
   
    /**
     * 获得所有的子章节
     *
     * @param parentHtmlToPdfHelper
     * @param list
     * @return
     * @throws FileNotFoundException
     * @throws IOException
     */
    public List<HtmlToPdfBean> cycleJudgeChildren ( HtmlToPdfBean parentHtmlToPdfHelper, List<HtmlToPdfBean> list )
            throws FileNotFoundException, IOException {
        List<HtmlToPdfBean> children = new ArrayList<HtmlToPdfBean> ();
        if (parentHtmlToPdfHelper.isIdHasChildren ()) {
            for (HtmlToPdfBean h : list) {
                if (h.getParentId () == parentHtmlToPdfHelper.getId ()) {
                    children.add ( h );
                }
            }
        }
        return children;
    }
   
    /**
     * 读取Html文件的信息,转换为List<Element>集合
     * @param htmlPath:html文件的路径
     * @return
     * @throws IOException
     * @throws IOException
     * @throws FileNotFoundException
     * @throws FileNotFoundException
     * @throws IOException
     * @throws IOException
     */
    public List<Element> getHtmlElement ( String htmlPath ) throws IOException {
        if (htmlPath.trim ().endsWith ( "html" )) {//html的路径必须以html结尾
            Reader reader = new FileReader ( htmlPath );
            List<Element> objects = HTMLWorker.parseToList ( reader, null, providers );
            return objects;
        }
        else {
            return null;
        }
    }
   
    /**
     * 图片
     * @author 姚腾蛟
     *  注意图片的大小(图片太大,会显示不了)
     * May 23, 2011
     */
    public static class MyImageFactory implements ImageProvider {
        public Image getImage ( String src, Map<String, String> h, ChainedProperties cprops, DocListener doc ) {
            try {
                Image image = Image.getInstance ( src );
                image.setTop ( 10 );
                return image;
            }
            catch (DocumentException e) {
                e.printStackTrace ();
            }
            catch (IOException e) {
                e.printStackTrace ();
            }
            return null;
        }
       
        public Image getImage ( String arg0, HashMap<String, String> arg1, ChainedProperties arg2, DocListener arg3 ) {
            return null;
        }
    }
   
    /**
     * 文字
     * 原有的方法不支持中文,需要重写getFont()方法
     * @author 姚腾蛟
     *
     * May 23, 2011
     */
    public static class MyFontFactory implements FontProvider {
        public Font getFont ( String fontname, String encoding, boolean embedded, float size, int style, BaseColor color ) {
            BaseFont baseFont = null;
            try {
                baseFont = BaseFont.createFont ( FONT, BaseFont.IDENTITY_H, BaseFont.EMBEDDED );
            }
            catch (DocumentException e) {
                e.printStackTrace ();
            }
            catch (IOException e) {
                e.printStackTrace ();
            }
            return new Font ( baseFont, 12 );//12为字体大小
        }
       
        public boolean isRegistered ( String fontname ) {
            return false;
        }
    }
   
    /**
     * Sets the styles for the HTML to PDF conversion
     * @param styles a StyleSheet object
     */
    public void setStyles ( StyleSheet styles ) {
        this.styles = styles;
    }
   
    /**
     * Set some extra properties.
     * @param providers the properties map
     */
    public void setProviders ( HashMap<String, Object> providers ) {
        this.providers = providers;
    }
   
}

 

package com.util;

/**
 * 从html到Pdf格式的帮助类
 *
 * @author 姚腾蛟
 *
 * May 24, 2011
 */
public class HtmlToPdfBean {
    private String  titleName     = "";   //标题名称
    private boolean idHasChildren = false; //是否含有子节点
    private String  htmlPath;             //节点内容的HTML路径
    private int     parentId;             //父节点的编号
    private int     id;                   //自己本身的编号
                                          
    public String getTitleName () {
        return titleName;
    }
   
    public void setTitleName ( String titleName ) {
        this.titleName = titleName;
    }
   
    public boolean isIdHasChildren () {
        return idHasChildren;
    }
   
    public void setIdHasChildren ( boolean idHasChildren ) {
        this.idHasChildren = idHasChildren;
    }
   
    public String getHtmlPath () {
        return htmlPath;
    }
   
    public void setHtmlPath ( String htmlPath ) {
        this.htmlPath = htmlPath;
    }
   
    public int getParentId () {
        return parentId;
    }
   
    public void setParentId ( int parentId ) {
        this.parentId = parentId;
    }
   
    public int getId () {
        return id;
    }
   
    public void setId ( int id ) {
        this.id = id;
    }
   
}

 


 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值