1:添加依赖
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>pdfToWord</groupId> <artifactId>pdfToWord</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> <version>1.2</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>fontbox</artifactId> <version>2.0.11</version> </dependency> <dependency> <groupId>com.levigo.jbig2</groupId> <artifactId>levigo-jbig2-imageio</artifactId> <version>2.0</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox-tools</artifactId> <version>2.0.11</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.6</version> </dependency> </dependencies> </project>
2:编写转换的方法
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import java.io.*; /** * 把pdf转换为word格式 * * @author Angin * @date 2019/3/18 0018. */ public class PdfToWord { /** * 转换 */ public void convertText(String pdfPath) { PDDocument doc = null; OutputStream fos = null; Writer writer = null; PDFTextStripper stripper = null; try { doc = PDDocument.load(new File(pdfPath)); fos = new FileOutputStream(pdfPath.substring(0, pdfPath.indexOf(".")) + ".doc"); writer = new OutputStreamWriter(fos, "UTF-8"); stripper = new PDFTextStripper(); int pageNumber = doc.getNumberOfPages(); stripper.setSortByPosition(true); stripper.setStartPage(1); stripper.setEndPage(pageNumber); stripper.writeText(doc, writer); writer.close(); doc.close(); } catch (IOException e) { e.printStackTrace(); } System.out.println("end.."); } }
3:main方法中进行测试
/** * main方法测试 * @author Angin * @date 2019/3/18 0018. */ public class MainClass { public static void main(String[] args) { PdfToWord convert=new PdfToWord(); convert.convertText("E:\\pdfToWord.pdf"); } }
此方法只适合文档型的pdf转换,如果图片的话,转换后无法读取。