程序源代码:
import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.OutputStreamWriter;import org.apache.pdfbox.pdfparser.PDFParser;import org.apache.pdfbox.pdmodel.PDDocument;import org.apache.pdfbox.util.PDFTextStripper;public class PdfExtracter { public PdfExtracter() { } public String GetTextFromPdf(String filename) throws Exception { String content = null; PDDocument pdfdocument = null; FileInputStream is = new FileInputStream(filename); PDFParser parser = new PDFParser(is); parser.parse(); pdfdocument = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); content = stripper.getText(pdfdocument); return content; } public static void main(String args[]) { PdfExtracter pf = new PdfExtracter(); try { String ts = pf.GetTextFromPdf("c:/a.pdf"); //System.out.println(ts); OutputStreamWriter osw = new OutputStreamWriter( new FileOutputStream("c:/aa.txt")); osw.write(ts); osw.flush(); osw.close(); } catch (Exception e) { e.printStackTrace(); } }}