java生成pdf(word模板转html，html再转pdf)

最新推荐文章于 2022-09-09 15:06:31 发布

小明哥哥的哥哥

最新推荐文章于 2022-09-09 15:06:31 发布

阅读量1.5k

点赞数

分类专栏：技术文章标签： pdf word转pdf word转html转pdf

本文链接：https://blog.csdn.net/qq1021623362/article/details/120194276

版权

技术专栏收录该内容

18 篇文章 0 订阅

订阅专栏

场景描述：

公司需要做一个打印功能，采用生成PDF的形式。由于PDF需要动态生成某些行，不能采用制作PDF表单的形式，word文档直接转PDF的形式会导致排版不美观。由于需要部署在linux服务器，采用windows的中间件的框架也不方便采用，最后选择了动态操作word文档，word文档生成html，再把html转为pdf的形式。

中间也经历了不少坑，下面慢慢一一道来。

一、加pom依赖

操作word文档的依赖
       <dependency>
           <groupId>org.apache.poi</groupId>
           <artifactId>poi</artifactId>
           <version>3.15</version>
       </dependency>
       <dependency>
       <groupId>org.apache.poi</groupId>
       <artifactId>poi-ooxml</artifactId>
       <version>3.15</version>
       </dependency>

pdf依赖

<dependency>
           <groupId>com.itextpdf</groupId>
           <artifactId>itextpdf</artifactId>
           <version>5.5.10</version>
       </dependency>
       <dependency>
           <groupId>com.itextpdf</groupId>
           <artifactId>itext-asian</artifactId>
           <version>5.2.0</version>
       </dependency>
       <dependency>
           <groupId>com.itextpdf.tool</groupId>
           <artifactId>xmlworker</artifactId>
           <version>5.5.11</version>
       </dependency>

<dependency>
       <groupId>org.jsoup</groupId>
       <artifactId>jsoup</artifactId>
       <version>1.14.2</version>
       </dependency>

<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>5.5.0</version>
</dependency>

<dependency>
<groupId>org.xhtmlrenderer</groupId>
<artifactId>flying-saucer-pdf-itext5</artifactId>
<version>9.1.22</version>
</dependency>

二、代码

ClassPathResource resource = new ClassPathResource("你的word文档模板.docx");

//获取word对应的对象
XWPFDocument wordDocument = getWord(resource, rhPersonTargetList);

//把word生成的html保存到ByteArrayOutputStream中
ByteArrayOutputStream htmlOs = new ByteArrayOutputStream();
XHTMLConverter.getInstance().convert(wordDocument, htmlOs, null);
String html = new String(htmlOs.toByteArray(), "UTF-8");

//生成条形码图片，此处不提供生成条形码的代码，可以理解成插入一张图片
byte[] barcode = BarCodeUtils.generateBarCode128(target.getCardNo(), false);
//处理html标签、样式问题
String htmltext = processHtml(barcode, html, map);
logger.debug(htmltext);
//把对应的html转换成pdf
ByteArrayOutputStream pdfOs = new ByteArrayOutputStream();
html2pdf(htmltext, pdfOs);

//拿到pdf数据

return pdfOs.toByteArray();

public static void html2pdf(String html, OutputStream os) throws Exception {

ITextRenderer renderer = new ITextRenderer();
renderer.setDocumentFromString(html);

       // step 3 解决中文支持，需要自己找到对应的字体
       ITextFontResolver fontResolver = renderer.getFontResolver();
       ClassPathResource resource = new ClassPathResource("font/SIMSUN.TTC");
       String fontPath = resource.getURL().getPath();

       if ("linux".equals(System.getProperty("os.name").toLowerCase())) {
           fontPath = "/usr/share/fonts/chiness/SIMSUN.TTC";
       } else {
           fontPath = "SIMSUN.TTC";
         }
       fontResolver.addFont(fontPath, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);

       renderer.layout();
       ByteArrayOutputStream pdfOs = new ByteArrayOutputStream();
       renderer.createPDF(pdfOs);
       byte[] data = removeBlankPdfPages(pdfOs.toByteArray());
       os.write(data);
       os.close();
   }

//去掉空白的pdf页
   public static byte[] removeBlankPdfPages(byte[] source) {

       try {
           //第1步：创建新读者
           PdfReader r = new PdfReader(source);

           RandomAccessFileOrArray raf = new RandomAccessFileOrArray(source);
           Document document = new Document(r.getPageSizeWithRotation(1));

           ByteArrayOutputStream pdfOs = new ByteArrayOutputStream();
           //步骤2：创建一个侦听文档的编写器
           PdfCopy writer = new PdfCopy(document, pdfOs);
           //第3步：我们打开文件
           document.open();
           //第4步：我们添加内容
           PdfImportedPage page = null;

           //遍历每一页，如果bs大于20，我们知道它不是空白。
           //如果小于20，则不包括该空白页面。
           for(int i = 1; i <= r.getNumberOfPages(); i ++) {

               //获取页面内容
               byte bContent [] = r.getPageContent(i, raf);
               ByteArrayOutputStream bs = new ByteArrayOutputStream();
               //将内容写入输出流
               bs.write(bContent);
               logger.debug("页面内容页面长度"+ i +"="+ bs.size());
               //将页面添加到新的pdf
               if(bs.size() > 120) {
                   page = writer.getImportedPage(r, i);
                   writer.addPage(page);
               }
               bs.close();
           }
               //关闭所有
           document.close();
           writer.close();
           raf.close();
           r.close();
           return pdfOs.toByteArray();
       }
       catch(Exception e){

       }
       return source;
   }

/**

* 生成业务对应的word文档

* @param resource word文档模板的资源

* @param

* @return 生成的word文档

* @throws IOException

private XWPFDocument getWord(ClassPathResource resource, List<?> list)

throws IOException {

XWPFDocument wordDocument = new XWPFDocument(resource.getInputStream());

//如果是业务表格需要某行自动拓展多行，根据list动态生成行 Iterator<XWPFTable> itTable = wordDocument.getTablesIterator();//获得Word的表格

if (list!= null && list.size() != 0) {

while (itTable.hasNext()) { //遍历表格

int trailing = 0;

XWPFTableRow trailingRow = null;

XWPFTable table = (XWPFTable) itTable.next();

int count = table.getNumberOfRows();//获得表格总行数

for (int i = 0; i < count; i++) { //遍历表格的每一行

XWPFTableRow row = table.getRow(i);//获得表格的行

List<XWPFTableCell> cells = row.getTableCells();//在行元素中，获得表格的单元格

int cellSize = cells.size();

for (int j = 0; j < cellSize; j++) {

XWPFTableCell cell = cells.get(j);

//包含了关键字的一行

if (!StringUtils.isEmpty(cell.getText()) && "关键字".equals(cell.getText())) {

trailingRow = row;

trailing = i;

}

int size = list.size();

for (int i = 0; i < size; i++) {

Object obj = list.get(i);

copy(table, trailingRow, trailing + i + 1, obj);

}

table.removeRow(trailing);

}

return wordDocument;

}

/**

* 复制随迁人对应的行，并进行赋值

* @param table

* @param sourceRow

* @param rowIndex

* @param rhPersonTarget

public void copy(XWPFTable table, XWPFTableRow sourceRow, int rowIndex, Object obj) {

//在表格指定位置新增一行

XWPFTableRow targetRow = table.insertNewTableRow(rowIndex);

//复制行属性

targetRow.getCtRow().setTrPr(sourceRow.getCtRow().getTrPr());

List<XWPFTableCell> cellList = sourceRow.getTableCells();

if (cellList == null) {

return;

}

//复制列及其属性和内容

XWPFTableCell targetCell = null;

int cellListSize = cellList.size();

for (int i = 0; i < cellListSize; i++) {

XWPFTableCell sourceCell = cellList.get(i);

targetCell = targetRow.addNewTableCell();

//列属性

targetCell.getCTTc().setTcPr(sourceCell.getCTTc().getTcPr());

//段落属性

if (sourceCell.getParagraphs() != null && sourceCell.getParagraphs().size() > 0) {

targetCell.getParagraphs().get(0).getCTP().setPPr(sourceCell.getParagraphs().get(0).getCTP().getPPr());

if (sourceCell.getParagraphs().get(0).getRuns() != null && sourceCell.getParagraphs().get(0).getRuns().size() > 0) {

XWPFRun cellR = targetCell.getParagraphs().get(0).createRun();

cellR.setFontSize(9);

//根据obj进行赋值，请自行处理

cellR.setText( );

cellR.setBold(sourceCell.getParagraphs().get(0).getRuns().get(0).isBold());

} else {

targetCell.setText(sourceCell.getText());

}

} else {

targetCell.setText(sourceCell.getText());

}

坑一：poi的版本建议使用3.15或者之前的版本，过高的版本在运行是会报一堆类找不到的错误，经过实测，3.15之后的版本都会报错。

坑二：操作word的时候，由于是读取了模板，setText方法无法直接赋值，如果word文档表格有只，setText会追加到原来的值后面。动态插入的表格行可以直接赋值，由于屏蔽了业务代码，需要自己进行改动使用。

本人采用的方式是不走setText方法进行赋值，在word文档的对应的表格设置关键字，直接在html进行关键字替换

坑三：word生成html未必是良好格式的，需要使用JSOUP框架进行校验：org.jsoup.nodes.Document htmlDocument = Jsoup.parse(html);

校验完成后得到的html也可能存在问题，需要自己拿到html进行发现，再手动处理对应的标签，比如<br>标签等。

坑四：word文档插入图片问题。由于需要插入条形码，word文档可以用poi框架直接操作插入图片，不过试过多次效果不理想，本人直接采用word文档模板采用占位图片，条形码生成为base64数据，采用替换<img>的方式进行，如果存在多图片还需要自行判断。

String image = Base64.encodeBase64String(barcode);

image = "data:image/jpg;base64," + image;

坑五：html转成pdf的时候，可能会存在空白页，该页不存在内容，但存在几十或者一百多个字节，导致pdf多了一页空白页。所以本人采用判断pdf页面的字节长度，是否把该页移除。对应的方法是removeBlankPdfPages

小明哥哥的哥哥

关注

0
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
java生成pdf(word模板转html，html再转pdf)

场景描述：公司需要做一个打印功能，采用生成PDF的形式。由于PDF需要动态生成某些行，不能采用制作PDF表单的形式，word文档直接转PDF的形式会导致排版不美观。由于需要部署在linux服务器，采用windows的中间件的框架也不方便采用，最后选择了动态操作word文档，word文档生成html，再把html转为pdf的形式。中间也经历了不少坑，下面慢慢一一道来。一、加pom依赖操作word文档的依赖 <dependency> ...
复制链接

扫一扫

专栏目录