1、用到的jar有:pdfbox-1.8.4.jar ,fontbox-1.8.4.jar ,pdfbox-app-1.8.4.jar , preflight-1.8.4.jar,preflight-app-1.8.4.jar,jempbox-1.8.4.jar,xmpbox-1.8.4
第一次写Java,不知道哪些jar是要用的,全部都添加进去了。下载地址:http://pdfbox.apache.org/downloads.html
2、将jar放在工程目录下,右键工程→Build Path→Configure Build Path→lib→Add JARs/Add External JARs
3、代码
参考:http://blog.csdn.net/meifage/article/details/6963985
参考:http://daning.iteye.com/blog/165284
参考:http://blog.csdn.net/weijie_search/article/details/2662189
package pdfReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
public class pdfReaderClass {
public static void main(String[] args)
{
System.out.println("Start...");
// System.out.println("Input the pdf document path:");
// String pdfFilePath = System.in.toString();
String pdfFilePath = "F:\\Eclipseworkspace\\read2.pdf";//为pdf路径赋默认值
System.out.println("The pdf file path is: " + pdfFilePath);
//将结果路径设置为原pdf文件路径
String txtFilePath = null;
if (pdfFilePath.length() > 4)
{
txtFilePath = pdfFilePath.substring(0, pdfFilePath.length() - 4) + ".txt";
}
else System.out.println("The pdf file path is invalid!");
String str=pdfReaderClass.getTextFromPDF(pdfFilePath); //获取pdf文件
File outputTxt = new File(txtFilePath);
// FileOutputStream fos = null;
// try {
// fos = new FileOutputStream(outputTxt);
// } catch (FileNotFoundException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// DataOutputStream out = new DataOutputStream(fos);
// try {
// out.writeUTF(str);
// } catch (IOException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
Writer txtWriter = null;
try {
txtWriter = new OutputStreamWriter(new FileOutputStream(outputTxt), "UTF-8");
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
txtWriter.write(str);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (txtWriter != null) {
// 关闭输出流
try {
txtWriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// System.out.println(str);
System.out.println("The output file path is: " + txtFilePath);
System.out.println("Over!");
}
/**
* @param pdfFilePath file path
* @return all text in the pdf file
**/
public static String getTextFromPDF(String pdfFilePath) {
String result = null;
FileInputStream is = null;
PDDocument document = null;
try {
is = new FileInputStream(pdfFilePath);
PDFParser parser = new PDFParser(is);
parser.parse();
document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
result = stripper.getText(document);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("The FileInputStream is null!");
e.printStackTrace();
}
}
if (document != null) {
try {
document.close();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("Fail to get the pdf document!");
e.printStackTrace();
}
}
}
return result;
}
}
遇到的问题:http://blog.csdn.net/lin9118/article/details/9309445