html转PDF工具类

最新推荐文章于 2024-07-26 16:25:12 发布

似水流年风萧兮

最新推荐文章于 2024-07-26 16:25:12 发布

阅读量969

点赞数

分类专栏： # java工具类文章标签： java html pdf maven

本文链接：https://blog.csdn.net/mr_zql/article/details/107577901

版权

java工具类专栏收录该内容

50 篇文章 2 订阅

订阅专栏

工具类：

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

//import org.xhtmlrenderer.pdf.ITextFontResolver;
//import org.xhtmlrenderer.pdf.ITextRenderer;

/**
 * html转PDF工具类
 * @author zql
 * @createTime 2020-12-01 00:06:50
 * @version 1.1
 * @modifyLog 1.1 优化代码
 *
 */
public class HtmlToPDF {

	private static Logger logger = LogUtils.getLogger();

	/**
	 * html模板生成PDF文件输出
	 * <pre>
	 * 此方法需要maven支持如下：
	 * &lt;dependency>
	 *   &lt;groupId>org.xhtmlrenderer&lt;/groupId>
	 *   &lt;artifactId>core-renderer&lt;/artifactId>
	 *   &lt;version>R8&lt;/version>
	 * &lt;/dependency>
	 * </pre>
	 * @author zql
	 * @createTime 2020-12-01 00:07:09
	 *
	 * @param htmlPath html模板文件路径
	 * @param map 要替换的字段值
	 * @param pdfPath pdf输出路径
	 * @throws Exception
	 */
	public static void generatePdf(String htmlPath, Map<String, String> map, String pdfPath) throws Exception {
		try {
			OutputStream outputFile = new FileOutputStream(pdfPath);
			// 获取html String形式
			String htmlStr = HtmlToPDF.operateHtml(htmlPath, map);
			
	        ITextRenderer renderer = new ITextRenderer();
	        
			ITextFontResolver font = renderer.getFontResolver();
			
			String sysPath = System.getProperty("user.dir");
			String fontPath = sysPath + "\\src\\top\\zqlweb\\fonts\\simsun.ttc";
			// 添加中文识别，这里是设置的宋体，Linux下要换成对应的字体，中文不显示则给不显示的标签加上样式font-family:'SimSun';
	        font.addFont(fontPath, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
	      
	        renderer.setDocumentFromString(htmlStr);
	        
			renderer.layout();
			renderer.createPDF(outputFile);
			renderer.finishPDF();
			logger.info("PDF is generate.");
		} catch (IOException e) {
			logger.info("HtmlToPDF.generatePdf occoured IOException! Message:{}", e.getMessage(), e);
			throw e;
		} catch (DocumentException e) {
			logger.info("HtmlToPDF.generatePdf occoured DocumentException! Message:{}", e.getMessage(), e);
			throw e;
		}
	}
	
	/**
	 * html模板生成PDF文件输出
	 * @author zql
	 * @createTime 2020-12-01 00:09:04
	 *
	 * @param htmlPath html模板文件路径
	 * @param map 要替换的字段值
	 * @param outputFile 输出文件形式
	 * @throws Exception
	 */
	public static void generatePdf(String htmlPath, Map<String, String> map, OutputStream outputFile) throws Exception {
		Document document = null;
		InputStream in = null;
		
		try {
			// 获取html String形式
			String htmlStr = HtmlToPDF.operateHtml(htmlPath, map);
			// 创建输入html流
			in = HtmlToPDF.stringToInputStream(htmlStr);
			// 第一步
			document = new Document();
			// 第二步
			PdfWriter writer = PdfWriter.getInstance(document, outputFile);
			// 第三步
			document.open();
			// 第四步  解决中文支持问题，中文不显示则给不显示的标签加上样式font-family:'SimSun';
			XMLWorkerHelper.getInstance().parseXHtml(writer, document, in, Charset.forName("UTF-8"));
			// 第五步
			in.close();
			document.close();
			logger.info("PDF is generate.");
		} catch (IOException e) {
			logger.info("HtmlToPDF.generatePdf occoured IOException! Message:{}", e.getMessage(), e);
			throw e;
		} catch (DocumentException e) {
			logger.info("HtmlToPDF.generatePdf occoured DocumentException! Message:{}", e.getMessage(), e);
			throw e;
		} finally {
			if (Objects.nonNull(in)) {
				in.close();
			}
			if (Objects.nonNull(document)) {
				document.close();
			}
		}
	}
	
	/**
	 * 获取当前操作系统
	 * @author zql
	 * @createTime 2020-12-01 00:10:34
	 *
	 * @return
	 */
	public static String getCurrentOperatingSystem(){
        String os = System.getProperty("os.name").toLowerCase();
        logger.info("---------当前操作系统是：{}---------", os);
        return os;
    }

	/**
	 * String转InputStream
	 * @author zql
	 * @createTime 2020-12-01 00:09:27
	 *
	 * @param str
	 * @return
	 */
	private static InputStream stringToInputStream(String str) {
		InputStream in = new ByteArrayInputStream(str.getBytes());
//		InputStream in = new ByteArrayInputStream(str.getBytes("UTF-8"));
		return in;
	}
	
	/**
	 * InputStream转String
	 * @author zql
	 * @createTime 2020-12-01 00:09:37
	 *
	 * @param in
	 * @return
	 */
	@SuppressWarnings("unused")
	private static String inputStreamToString(InputStream in) {
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		int i = 1;
		try {
			while ((i = in.read()) != -1) {
				baos.write(i);
			}
		} catch (Exception e) {
			logger.info("InputStream转String处理失败：{}", e.getMessage(), e);
		}
		return baos.toString();
	}
	
	/**
	 * 读取html模板文件并替换参数值
	 * @author zql
	 * @createTime 2020-12-01 00:10:02
	 *
	 * @param htmlPath html文件路径
	 * @param map 要替换的参数值
	 * @return
	 * @throws Exception
	 */
	private static String operateHtml(String htmlPath, Map<String, String> map) throws Exception {
		// html字符串
		String htmlStr = "";
		File htmlFile = null;
		try {
			htmlFile = new File(htmlPath);
			
			//方法一 获取在 htmlStr io 直接读取
			/*
			InputStream in = new FileInputStream(HTML);
			htmlStr = HtmlToPDF.inputStreamToString(in);
			*/
			//方法二 获取htmlStr jsoup,建议使用，会补全缺失的</**>
			org.jsoup.nodes.Document html = Jsoup.parse(htmlFile,"UTF-8");
			htmlStr = html.html();
			
			// 解决中文问题
			htmlStr = htmlStr.replaceAll("font-family:[^;]*;", "font-family:'SimSun';");
			
			// 解决docx中文问题
			htmlStr = htmlStr.replaceAll("width:[^;]*;", "");
			htmlStr = htmlStr.replaceAll("margin-bottom:[^;]*;", "");
			htmlStr = htmlStr.replaceAll("margin-left:[^;]*;", "");
			htmlStr = htmlStr.replaceAll("margin-right:[^;]*;", "");
			htmlStr = htmlStr.replaceAll("margin-top:[^;]*;", "");
			htmlStr = htmlStr.replaceAll("p\\.[^\\}]*\\}", "");
			htmlStr = htmlStr.replaceAll("span\\.[^\\}]*\\}", "");
			htmlStr = htmlStr.replaceAll("<p>", "<p style = \"font-family:'SimSun';\">");
			
			//将未闭合的<meta>标签 闭合
			htmlStr = HtmlToPDF.expectedClosingTag(htmlStr, "(<META[^>]*[^/]>)|(<meta[^>]*[^/]>)");
			
			//表格线条错误粗修改
			//先舍弃，表格会加粗
			/*
			htmlStr = htmlStr.replaceAll("border-collapse:collapse;", "#specialTextSave4Replace%");
			htmlStr = htmlStr.replaceAll("border-[^;]*", "border:1;");
			htmlStr = htmlStr.replaceAll("#specialTextSave4Replace%", "border-collapse:collapse;");
			*/
			htmlStr = htmlStr.replaceAll("thin solid black", "1");
			
			//替换特殊字段   字段形式 如： ${test}
			for (Map.Entry<String, String> entry : map.entrySet()){
				htmlStr = htmlStr.replaceAll("\\$\\{" + entry.getKey() + "\\}", entry.getValue());
				htmlStr = htmlStr.replaceAll("\\$\\{[^\\}]*" + entry.getKey() + "[^\\}]*\\}", entry.getValue());
			}
			
			// 将未赋值的去除
			htmlStr = htmlStr.replaceAll("\\$\\{[^\\}]*[^\\}]*\\}", "&nbsp;&nbsp;&nbsp;&nbsp;");
			// 将body标签内样式去掉，并加上font-family:'SimSun';解决中文不显示问题
			htmlStr = htmlStr.replaceAll("<body[^>]*>", "<body style=\"font-family:'SimSun';\">");
			// 将<br>标签内样式替换掉
//			htmlStr = htmlStr.replaceAll("(<br></br>)|(<br>)|(<br/>)", "<p>&nbsp;</p>");
	        //java替换html中的所有的行内样式
//			htmlStr=htmlStr.replaceAll("[a-zA-Z]*=\".*\""," ");
		} catch (Exception e) {
			logger.info("HtmlToPDF.operateHtml occoured Exception! Message:{}", e.getMessage(), e);
			throw e;
		} finally {
			if (Objects.nonNull(htmlFile)) {
				htmlFile.delete();
			}
		}
		return htmlStr;
	} 
	
	/**
	 * 处理html meta标签，使meta标签闭合
	 * @author zql
	 * @createTime 2020-12-01 00:10:24
	 *
	 * @param htmlStr
	 * @param regex
	 * @return
	 */
	private static String expectedClosingTag(String htmlStr, String regex) {
		Pattern p = Pattern.compile(regex);
		Matcher m = p.matcher(htmlStr);
		List<String> result = new ArrayList<String>();
		while (m.find()) {
			result.add(m.group());
		}
		for (String str : result) {
			int strLen = str.length();
			if (Objects.nonNull(str) && strLen > 1) {
				logger.info("In need of replacement:{}", str);
				htmlStr = htmlStr.replace(str, str.substring(0, strLen - 1) + "/>");
			}
		}
		return htmlStr;
	}

}

测试类：

import org.junit.Test;

import java.io.FileOutputStream;
import java.util.HashMap;
import java.util.Map;

/**
 * html转PDF工具测试类
 *
 * @author zql
 * @version 1.0
 * @createTime 2021-01-02 15:43:00
 * @modifyLog
 */
public class HtmlToPDFTest {

    @Test
    public void generatePdf() throws Exception {
        String htmlFile1 = "E:/test/test1.html";
        String pdfFile1 = "E:/test/test1.pdf";

        Map<String, String> map = new HashMap<String, String>();
        map.put("test1", "测试1");
        map.put("test2", "测试2");
        map.put("test3", "测试3");
        map.put("test4", "测试4");
        HtmlToPDF.generatePdf(htmlFile1, map, pdfFile1);
    }

    @Test
    public void generatePdf2() throws Exception {
        String htmlFile2 = "E:/test/test2.html";
        String pdfFile2 = "E:/test/test2.pdf";

        Map<String, String> map = new HashMap<String, String>();
        map.put("test1", "测试1");
        map.put("test2", "测试2");
        map.put("test3", "测试3");
        map.put("test4", "测试4");
        HtmlToPDF.generatePdf(htmlFile2, map, new FileOutputStream(pdfFile2));
    }

    @Test
    public void getCurrentOperatingSystem() {
        System.out.println(HtmlToPDF.getCurrentOperatingSystem());
    }
}

maven依赖：

<!--pdf-->
<dependency>
    <groupId>org.xhtmlrenderer</groupId>
    <artifactId>core-renderer</artifactId>
    <version>R8</version>
</dependency>
      
<!-- itextpdf -->
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>itextpdf</artifactId>
    <version>5.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf.tool/xmlworker -->
<dependency>
    <groupId>com.itextpdf.tool</groupId>
    <artifactId>xmlworker</artifactId>
    <version>5.4.0</version>
</dependency>
      
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
    <groupId>org.slf4j</groupId>
    <artifactId>slf4j-log4j12</artifactId>
    <version>1.7.2</version>
</dependency>

<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.7.2</version>
</dependency>