参考:https://www.showdoc.cc/243234797494858?page_id=1390154181784190
引入jar包
D:\Workspaces\MyEclipse 2017 CI\ZXJY\WebRoot\WEB-INF\lib\itext-asian-5.2.0.jar
D:\Workspaces\MyEclipse 2017 CI\ZXJY\WebRoot\WEB-INF\lib\itextpdf-5.5.1.jar
D:\Workspaces\MyEclipse 2017 CI\ZXJY\WebRoot\WEB-INF\lib\xmlworker-5.5.4.jar
java实现类
package com.pro.utils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import javax.servlet.ServletOutputStream;
import javax.servlet.WriteListener;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpServletResponseWrapper;
import org.jsoup.Jsoup;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.Pipeline;
import com.itextpdf.tool.xml.XMLWorker;
import com.itextpdf.tool.xml.XMLWorkerFontProvider;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.exceptions.CssResolverException;
import com.itextpdf.tool.xml.html.CssAppliers;
import com.itextpdf.tool.xml.html.CssAppliersImpl;
import com.itextpdf.tool.xml.html.Tags;
import com.itextpdf.tool.xml.parser.XMLParser;
import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
public class CreatePdfDocumentUtil {
/**
* 根据URL提前blog的基本信息,返回结果
* @param URL 例:http://localhost:8080/scm/scm/po/gather/pdftest/pdftesthtml3.html(能直接返回某个html的URL,
* 我开始传url时候被struts1拦截了,应为get不到session的登录人信息,所以得到的是登录页面的html)
* @return
* @throws Exception
*/
public static String[] extractHtmlInfo(String URL) throws Exception {
/*这里为什么用数组,是因为返回的时候不仅可以返回选择的html,
还有从document提取其他的信息单独存在数组里返回,然后利用iText在pdf里面组装数据,可以在网上查*/
String[] info = new String[1];
// 直接把URL解析成document,然后调用document.html()解析为html
org.jsoup.nodes.Document doc = Jsoup.connect(URL).get();
// 此doc.select是用来选择完整的html中某一部分这里为第一个div的css为entry的部分,所以你的html上要有div的class为entry哦
org.jsoup.nodes.Element entry = doc.select("div.entry").first();
info[0] = entry.html();
return info;
}
/**
* 直接通过得到html来取得想要的部分html
* @param html
* @return
* @throws Exception
*/
public static String[] extractHtmlInfo2(String html) throws Exception {
String[] info = new String[1];
// 把html转换为document
org.jsoup.nodes.Document doc = Jsoup.parse(html);
// 此doc.select是用来选择完整的html中某一部分这里为第一个div的css为entry的部分,所以你的html上要有div的class为entry哦
org.jsoup.nodes.Element entry = doc.select("div.entry").first();
info[0] = entry.html();
return info;
}
/**
* 把String 转为 InputStream
* @param content
* @return
*/
public static InputStream parse2Stream(String content) {
try {
ByteArrayInputStream stream = new ByteArrayInputStream(
content.getBytes("UTF-8"));
return stream;
} catch (Exception e) {
return null;
}
}
/**
* 直接把网页内容转为PDF文件
*
* @param
* @throws Exception
*/
public static void parseURL2PDFFile(String pdfFile, String html) {
try {
BaseFont bfCN = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H",
false);
// 中文字体定义
// Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);
// Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0, 204,
// 255));
// Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);
Document document = new Document(PageSize.A4);
// 设置pdf的背景图片
// Image image = Image.getInstance("D:/移动背景图片.jpg");
// image.setAlignment(image.UNDERLYING);
// image.setAbsolutePosition(0,0);
// image.scaleAbsolute(595,842);
PdfWriter pdfwriter = PdfWriter.getInstance(document,
new FileOutputStream(pdfFile));
pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);
document.open();
// document.add(image);
//得到解析的html
String[] blogInfo = extractHtmlInfo2(html);
/*html文件转换为pdf文档
AsianFontProvider()函数是用来解决XMLWorkerHelper.getInstance().parseXHtml()转pdf中文不显示问题*/
XMLWorkerHelper.getInstance().parseXHtml(pdfwriter, document,parse2Stream(blogInfo[0]),null, new AsianFontProvider());
document.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 此forward方法执行完毕之后不会输出内容到浏览器,而是把输出到字节流,最后以字符串的形式返回
* @param request
* @param response
* @param src
* @return
*/
public static String forward(HttpServletRequest request, HttpServletResponse response, String src) {
try{
/* ↓↓↓↓↓重新构造response,修改response中的输出流对象,使其输出到字节数组↓↓↓↓↓ */
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
final ServletOutputStream servletOuputStream = new ServletOutputStream() {
@Override
public void write(int b) throws IOException {
byteArrayOutputStream.write(b);
}
@Override
public boolean isReady() {
return false;
}
@Override
public void setWriteListener(WriteListener writeListener) {
}
};
final PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(byteArrayOutputStream, "UTF-8"));
response = new HttpServletResponseWrapper(response) {
public ServletOutputStream getOutputStream() {
return servletOuputStream;
}
public PrintWriter getWriter() {
return printWriter;
}
};
/* ↑↑↑↑↑↑重新构造response,修改response中的输出流对象,使其输出到字节数组↑↑↑↑↑↑ */
//执行forward操作
request.getRequestDispatcher(src).forward(request,response);
//把字节流中的内容太转为字符串
return new String(byteArrayOutputStream.toByteArray(),"utf-8");
}
catch (Exception e){
throw new RuntimeException(e);
}
}
/**
* 把html转换成pdf,以字节数组的形式返回pdf文件
* @param html
* @return pdf字节数组
* @throws IOException
* @throws DocumentException
* @throws CssResolverException
*/
public static byte[] html2pdf(String html) throws IOException, DocumentException,CssResolverException {
Document document = new Document(PageSize.A4);
ByteArrayOutputStream os = new ByteArrayOutputStream();
PdfWriter writer = PdfWriter.getInstance(document,os);
document.open();
XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(){
@Override
public Font getFont(String fontname, String encoding, float size, int style) {
return super.getFont(fontname == null ? "宋体" : fontname, encoding, size, style);
}
};
fontProvider.addFontSubstitute("lowagie", "garamond");
fontProvider.setUseUnicode(true);
//使用我们的字体提供器,并将其设置为unicode字体样式
CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
CSSResolver cssResolver = XMLWorkerHelper.getInstance().getDefaultCssResolver(true);
Pipeline<?> pipeline = new CssResolverPipeline(cssResolver,new HtmlPipeline(htmlContext, new PdfWriterPipeline(document,writer)));
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser p = new XMLParser(worker);
p.parse(new InputStreamReader(new ByteArrayInputStream(html.getBytes("gbk"))));
document.close();
return os.toByteArray();
}
}
关于使用XMLWorkerHelper.getInstance().parseXHtml生成PDF时中文不显示或乱码问题