1、引入maven依赖
<dependency>
<groupId>net.sf.cssbox</groupId>
<artifactId>pdf2dom</artifactId>
<version>1.9</version>
</dependency>
2、java实现
/**
* pdf转html
* @param inputFilename pdf文件路径
* @param outFilename html文件路径
* @throws IOException
* @throws ParserConfigurationException
*/
public static void generateHTMLFromPDF(String inputFilename, String outFilename) throws IOException, ParserConfigurationException {
PDDocument pdf = PDDocument.load(new File(inputFilename));
Writer output = new PrintWriter(outFilename, "utf-8");
PDFDomTree tree = new PDFDomTree();
tree.writeText(pdf, output);
output.close();
}
/**
* 测试
* @param args
* @throws IOException
* @throws ParserConfigurationException
*/
public static void main(String[] args) throws IOException, ParserConfigurationException {
String input = "C:\\Users\\ysx\\Desktop\\fileTest\\DB521危险化学品企业可查验防护措施管理规范.pdf";
String output = "C:\\Users\\ysx\\Desktop\\fileTest\\DB521危险化学品企业可查验防护措施管理规范.html";
PdfUtil.generateHTMLFromPDF(input, output);
}