前言
仅记录学习笔记,如有错误欢迎指正。
最近项目需要把用户上传的pdf裁剪出一部分,和另外一个pdf拼接,基于目前的代码,记录一下pdf和html相互转换的demo。网上大多数的转换都有问题。pdf转html
mavan引入jar包:
这个jar还会依赖其他jar包,maven会自动一起导入的。
<dependency>
<groupId>net.sf.cssbox</groupId>
<artifactId>pdf2dom</artifactId>
<version>1.7</version>
</dependency>
注释的代码可以输出为html文件
path为:“D:\xxx\abc.html”
public static void pdfToHtmlTest(String inPdfPath, String outputHtmlPath) {
try {
// BufferedWriter out = new BufferedWriter(new OutputStreamWriter(
// new FileOutputStream(new File(filePath)), StandardCharsets.UTF_8));
//加载PDF文档
//PDDocument document = PDDocument.load(bytes);
PDDocument document = PDDocument.load(new File("1.pdf"));
PDFDomTree pdfDomTree = new PDFDomTree();
Writer out = new StringWriter();
pdfDomTree.writeText(document, out);
// String htmlOutput = out.toString();
System.out.println(out.toString());
} catch (Exception e) {
e.printStackTrace();
}
}
这种方法转为html后,css样式就确定了,所以如果要拼接其他的html内容,如何在不影响其他css样式的情况下去拼接并且成功显示?很麻烦。暂时未能实现。
html转pdf
maven
<!-- HTML TO PDF-->
<!-- itext7html转pdf -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>html2pdf</artifactId>
<version>3.0.2</version>
</dependency>
<!-- 中文字体支持 -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>font-asian</artifactId>
<version>7.1.13</version>
</dependency>
demo:
package com.test.util.HtmlToPdf;
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.io.font.PdfEncodings;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.styledxmlparser.jsoup.helper.StringUtil;
import java.io.*;
import java.nio.charset.StandardCharsets;
public class Html2PdfUtil {
/**
* html转pdf
*
* @param inputStream 输入流
* @param fontPath 字体路径,ttc后缀的字体需要添加<b>,0<b/>
* @param outputStream 输出流
* @date : 2021/1/15 14:07
*/
public static void convertToPdf(InputStream inputStream, String fontPath, OutputStream outputStream) throws IOException, IOException {
PdfWriter pdfWriter = new PdfWriter(outputStream);
PdfDocument pdfDocument = new PdfDocument(pdfWriter);
//设置为A4大小
pdfDocument.setDefaultPageSize(PageSize.A4);
//添加水印
// pdfDocument.addEventHandler(PdfDocumentEvent.END_PAGE, new WaterMarkEventHandler(waterMark));
//添加中文字体支持
ConverterProperties properties = new ConverterProperties();
FontProvider fontProvider = new FontProvider();
PdfFont sysFont = PdfFontFactory.createFont("STSongStd-Light", "UniGB-UCS2-H", false);
fontProvider.addFont(sysFont.getFontProgram(), "UniGB-UCS2-H");
//添加自定义字体,例如微软雅黑
if (!StringUtil.isBlank(fontPath)){
PdfFont microsoft = PdfFontFactory.createFont(fontPath, PdfEncodings.IDENTITY_H, false);
fontProvider.addFont(microsoft.getFontProgram(), PdfEncodings.IDENTITY_H);
}
properties.setFontProvider(fontProvider);
HtmlConverter.convertToPdf(inputStream, pdfDocument, properties);
pdfWriter.close();
pdfDocument.close();
}
public static void main(String[] args) throws IOException {
String htmlText = " <head>\n" +
"</head>\n" +
"<body>\n" +
" <p>iText</p>\n" +
" <div><b>大发发发萨法发发呆发呆东方大厦发</b></div>\n" +
" <div style=\"font-family:SimSun;weight:normal\">Your developer here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的journey with iText begins here法大大是地方大发顺丰萨法发顺丰的</div>\n" +
" \n" +
" <div style=\"font-family:SimSun;font-weight:bold\">Your developer here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的journey with iText begins here法大大是地方大发顺丰萨法发顺丰的</div>\n" +
" \n" +
" <p>iText</p>\n" +
" <div><b>大发发发萨法发发呆发呆东方大厦发</b></div>\n" +
" <div style=\"font-family:宋体;weight:normal\">Your developer here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的journey with iText begins here法大大是地方大发顺丰萨法发顺丰的</div>\n" +
" \n" +
" <div style=\"font-family:宋体;font-weight:bold\">Your developer here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的here法大大是地方大发顺丰萨法发顺丰的journey with iText begins here法大大是地方大发顺丰萨法发顺丰的</div>\n" +
" </body>";
File file = new File("test.pdf");
convertToPdf(convertStringToInputStream(htmlText),"",new FileOutputStream(file));
}
// String -> InputStream
private static InputStream convertStringToInputStream(String name) {
InputStream result = new ByteArrayInputStream(name.getBytes(StandardCharsets.UTF_8));
return result;
}
/**
* pdf文件转byte数组
* @param filePath
* @return
*/
private byte[] getBytes(String filePath){
byte[] buffer = null;
try {
File file = new File(filePath);
FileInputStream fis = new FileInputStream(file);
ByteArrayOutputStream bos = new ByteArrayOutputStream(1000);
byte[] b = new byte[1000];
int n;
while ((n = fis.read(b)) != -1) {
bos.write(b, 0, n);
}
fis.close();
bos.close();
buffer = bos.toByteArray();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buffer;
}
}