源代码
package pdftoword;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Scanner;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
public class PdfToWord {
public static void pdf(String url){
try {
PDDocument doc=PDDocument.load(new File(url));
int number=doc.getNumberOfPages();
url=url.substring(0,url.lastIndexOf("."));
String fileName=url+".doc";
File file=new File(fileName);
if(!file.exists()){
file.createNewFile();
}
FileOutputStream outputStream=new FileOutputStream(fileName);
Writer writer=new OutputStreamWriter(outputStream, "utf-8");
PDFTextStripper stripper=new PDFTextStripper();
stripper.setSortByPosition(true);
stripper.setStartPage(1);
stripper.setEndPage(number);
stripper.writeText(doc, writer);
writer.close();
outputStream.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public static void main(String[] args) {
System.out.println("请输入文件路径");
Scanner scanner=new Scanner(System.in);
pdf(scanner.next());
}
}
各种jar包下载地址https://pdfbox.apache.org/download.cgi
下载以下的包运行即可