工具类:
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//import org.xhtmlrenderer.pdf.ITextFontResolver;
//import org.xhtmlrenderer.pdf.ITextRenderer;
/**
* html转PDF工具类
* @author zql
* @createTime 2020-12-01 00:06:50
* @version 1.1
* @modifyLog 1.1 优化代码
*
*/
public class HtmlToPDF {
private static Logger logger = LogUtils.getLogger();
/**
* html模板生成PDF文件输出
* <pre>
* 此方法需要maven支持如下:
* <dependency>
* <groupId>org.xhtmlrenderer</groupId>
* <artifactId>core-renderer</artifactId>
* <version>R8</version>
* </dependency>
* </pre>
* @author zql
* @createTime 2020-12-01 00:07:09
*
* @param htmlPath html模板文件路径
* @param map 要替换的字段值
* @param pdfPath pdf输出路径
* @throws Exception
*/
public static void generatePdf(String htmlPath, Map<String, String> map, String pdfPath) throws Exception {
try {
OutputStream outputFile = new FileOutputStream(pdfPath);
// 获取html String形式
String htmlStr = HtmlToPDF.operateHtml(htmlPath, map);
ITextRenderer renderer = new ITextRenderer();
ITextFontResolver font = renderer.getFontResolver();
String sysPath = System.getProperty("user.dir");
String fontPath = sysPath + "\\src\\top\\zqlweb\\fonts\\simsun.ttc";
// 添加中文识别,这里是设置的宋体,Linux下要换成对应的字体,中文不显示则给不显示的标签加上样式font-family:'SimSun';
font.addFont(fontPath, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
renderer.setDocumentFromString(htmlStr);
renderer.layout();
renderer.createPDF(outputFile);
renderer.finishPDF();
logger.info("PDF is generate.");
} catch (IOException e) {
logger.info("HtmlToPDF.generatePdf occoured IOException! Message:{}", e.getMessage(), e);
throw e;
} catch (DocumentException e) {
logger.info("HtmlToPDF.generatePdf occoured DocumentException! Message:{}", e.getMessage(), e);
throw e;
}
}
/**
* html模板生成PDF文件输出
* @author zql
* @createTime 2020-12-01 00:09:04
*
* @param htmlPath html模板文件路径
* @param map 要替换的字段值
* @param outputFile 输出文件形式
* @throws Exception
*/
public static void generatePdf(String htmlPath, Map<String, String> map, OutputStream outputFile) throws Exception {
Document document = null;
InputStream in = null;
try {
// 获取html String形式
String htmlStr = HtmlToPDF.operateHtml(htmlPath, map);
// 创建输入html流
in = HtmlToPDF.stringToInputStream(htmlStr);
// 第一步
document = new Document();
// 第二步
PdfWriter writer = PdfWriter.getInstance(document, outputFile);
// 第三步
document.open();
// 第四步 解决中文支持问题,中文不显示则给不显示的标签加上样式font-family:'SimSun';
XMLWorkerHelper.getInstance().parseXHtml(writer, document, in, Charset.forName("UTF-8"));
// 第五步
in.close();
document.close();
logger.info("PDF is generate.");
} catch (IOException e) {
logger.info("HtmlToPDF.generatePdf occoured IOException! Message:{}", e.getMessage(), e);
throw e;
} catch (DocumentException e) {
logger.info("HtmlToPDF.generatePdf occoured DocumentException! Message:{}", e.getMessage(), e);
throw e;
} finally {
if (Objects.nonNull(in)) {
in.close();
}
if (Objects.nonNull(document)) {
document.close();
}
}
}
/**
* 获取当前操作系统
* @author zql
* @createTime 2020-12-01 00:10:34
*
* @return
*/
public static String getCurrentOperatingSystem(){
String os = System.getProperty("os.name").toLowerCase();
logger.info("---------当前操作系统是:{}---------", os);
return os;
}
/**
* String转InputStream
* @author zql
* @createTime 2020-12-01 00:09:27
*
* @param str
* @return
*/
private static InputStream stringToInputStream(String str) {
InputStream in = new ByteArrayInputStream(str.getBytes());
// InputStream in = new ByteArrayInputStream(str.getBytes("UTF-8"));
return in;
}
/**
* InputStream转String
* @author zql
* @createTime 2020-12-01 00:09:37
*
* @param in
* @return
*/
@SuppressWarnings("unused")
private static String inputStreamToString(InputStream in) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
int i = 1;
try {
while ((i = in.read()) != -1) {
baos.write(i);
}
} catch (Exception e) {
logger.info("InputStream转String处理失败:{}", e.getMessage(), e);
}
return baos.toString();
}
/**
* 读取html模板文件并替换参数值
* @author zql
* @createTime 2020-12-01 00:10:02
*
* @param htmlPath html文件路径
* @param map 要替换的参数值
* @return
* @throws Exception
*/
private static String operateHtml(String htmlPath, Map<String, String> map) throws Exception {
// html字符串
String htmlStr = "";
File htmlFile = null;
try {
htmlFile = new File(htmlPath);
//方法一 获取在 htmlStr io 直接读取
/*
InputStream in = new FileInputStream(HTML);
htmlStr = HtmlToPDF.inputStreamToString(in);
*/
//方法二 获取htmlStr jsoup,建议使用,会补全缺失的</**>
org.jsoup.nodes.Document html = Jsoup.parse(htmlFile,"UTF-8");
htmlStr = html.html();
// 解决中文问题
htmlStr = htmlStr.replaceAll("font-family:[^;]*;", "font-family:'SimSun';");
// 解决docx中文问题
htmlStr = htmlStr.replaceAll("width:[^;]*;", "");
htmlStr = htmlStr.replaceAll("margin-bottom:[^;]*;", "");
htmlStr = htmlStr.replaceAll("margin-left:[^;]*;", "");
htmlStr = htmlStr.replaceAll("margin-right:[^;]*;", "");
htmlStr = htmlStr.replaceAll("margin-top:[^;]*;", "");
htmlStr = htmlStr.replaceAll("p\\.[^\\}]*\\}", "");
htmlStr = htmlStr.replaceAll("span\\.[^\\}]*\\}", "");
htmlStr = htmlStr.replaceAll("<p>", "<p style = \"font-family:'SimSun';\">");
//将未闭合的<meta>标签 闭合
htmlStr = HtmlToPDF.expectedClosingTag(htmlStr, "(<META[^>]*[^/]>)|(<meta[^>]*[^/]>)");
//表格线条错误粗修改
//先舍弃,表格会加粗
/*
htmlStr = htmlStr.replaceAll("border-collapse:collapse;", "#specialTextSave4Replace%");
htmlStr = htmlStr.replaceAll("border-[^;]*", "border:1;");
htmlStr = htmlStr.replaceAll("#specialTextSave4Replace%", "border-collapse:collapse;");
*/
htmlStr = htmlStr.replaceAll("thin solid black", "1");
//替换特殊字段 字段形式 如: ${test}
for (Map.Entry<String, String> entry : map.entrySet()){
htmlStr = htmlStr.replaceAll("\\$\\{" + entry.getKey() + "\\}", entry.getValue());
htmlStr = htmlStr.replaceAll("\\$\\{[^\\}]*" + entry.getKey() + "[^\\}]*\\}", entry.getValue());
}
// 将未赋值的去除
htmlStr = htmlStr.replaceAll("\\$\\{[^\\}]*[^\\}]*\\}", " ");
// 将body标签内样式去掉,并加上font-family:'SimSun';解决中文不显示问题
htmlStr = htmlStr.replaceAll("<body[^>]*>", "<body style=\"font-family:'SimSun';\">");
// 将<br>标签内样式替换掉
// htmlStr = htmlStr.replaceAll("(<br></br>)|(<br>)|(<br/>)", "<p> </p>");
//java替换html中的所有的行内样式
// htmlStr=htmlStr.replaceAll("[a-zA-Z]*=\".*\""," ");
} catch (Exception e) {
logger.info("HtmlToPDF.operateHtml occoured Exception! Message:{}", e.getMessage(), e);
throw e;
} finally {
if (Objects.nonNull(htmlFile)) {
htmlFile.delete();
}
}
return htmlStr;
}
/**
* 处理html meta标签,使meta标签闭合
* @author zql
* @createTime 2020-12-01 00:10:24
*
* @param htmlStr
* @param regex
* @return
*/
private static String expectedClosingTag(String htmlStr, String regex) {
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(htmlStr);
List<String> result = new ArrayList<String>();
while (m.find()) {
result.add(m.group());
}
for (String str : result) {
int strLen = str.length();
if (Objects.nonNull(str) && strLen > 1) {
logger.info("In need of replacement:{}", str);
htmlStr = htmlStr.replace(str, str.substring(0, strLen - 1) + "/>");
}
}
return htmlStr;
}
}
测试类:
import org.junit.Test;
import java.io.FileOutputStream;
import java.util.HashMap;
import java.util.Map;
/**
* html转PDF工具测试类
*
* @author zql
* @version 1.0
* @createTime 2021-01-02 15:43:00
* @modifyLog
*/
public class HtmlToPDFTest {
@Test
public void generatePdf() throws Exception {
String htmlFile1 = "E:/test/test1.html";
String pdfFile1 = "E:/test/test1.pdf";
Map<String, String> map = new HashMap<String, String>();
map.put("test1", "测试1");
map.put("test2", "测试2");
map.put("test3", "测试3");
map.put("test4", "测试4");
HtmlToPDF.generatePdf(htmlFile1, map, pdfFile1);
}
@Test
public void generatePdf2() throws Exception {
String htmlFile2 = "E:/test/test2.html";
String pdfFile2 = "E:/test/test2.pdf";
Map<String, String> map = new HashMap<String, String>();
map.put("test1", "测试1");
map.put("test2", "测试2");
map.put("test3", "测试3");
map.put("test4", "测试4");
HtmlToPDF.generatePdf(htmlFile2, map, new FileOutputStream(pdfFile2));
}
@Test
public void getCurrentOperatingSystem() {
System.out.println(HtmlToPDF.getCurrentOperatingSystem());
}
}
maven依赖:
<!--pdf-->
<dependency>
<groupId>org.xhtmlrenderer</groupId>
<artifactId>core-renderer</artifactId>
<version>R8</version>
</dependency>
<!-- itextpdf -->
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf.tool/xmlworker -->
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>5.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.2</version>
</dependency>