注:不适用于扫描版不能和图片。
package dsa;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
public class pdfbox {
public static void main(String[] args) throws Exception {
File file=new File("d:/00.pdf");
PDDocument doc=PDDocument.load(file);
int numberOfPages = doc.getNumberOfPages();
System.out.println("页数:"+numberOfPages+" ");
FileOutputStream fos=new FileOutputStream(new File("d:/00.txt"));
Writer writer=new OutputStreamWriter(fos, "UTF-8");
PDFTextStripper pdfTextStripper=new PDFTextStripper();
pdfTextStripper.setStartPage(100);
pdfTextStripper.setEndPage(155);
pdfTextStripper.writeText(doc, writer);
writer.close();
doc.close();
}
}