package com.leixinhui.test;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
public class Test {
/**
* @param args
* @throws Exception
* @throws SQLException
*/
public static void main(String[] args) throws Exception {
String strFile = "F:\\代码\\Java\\Java操作PDF文档\\PDFbox使用教程.pdf";
new Test().getText(strFile);
}
/**
* 打印pdf文档文本内容
* @param strFile pdf文件
* @throws Exception
*/
private void getText(String strFile) throws Exception{
boolean sort = false;
int startPage = 1;
int endPage = Integer.MAX_VALUE;
InputStream inputStream = null;
Writer outWriter = null;
PDFTextStripper textStripper = null;
PDDocument document = null;
try {
inputStream = new FileInputStream(strFile);
outWriter = new OutputStreamWriter( System.out );
textStripper = new PDFTextStripper();
document = PDDocument.load(inputStream);
textStripper.setSortByPosition( sort );
textStripper.setStartPage( startPage );
textStripper.setEndPage( endPage );
textStripper.writeText(document, outWriter);
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
inputStream.close();
outWriter.flush();
outWriter.close();
}
}
}
备注:使用Apache PDFBox 1.6.0