下载地址:http://pdfbox.apache.org/downloads.html
下载所需jar包如下:
bcprov-jdk16-140.jar
commons-logging-1.1.3.jar
fontbox-1.8.7.jar
pdfbox-1.8.7.jar
代码实现如下:
package com.util;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.util.PDFTextStripper;
/**
* @author www.yoodb.com
*/
public class PdfParser {
/**
* 文件名集合
*/
private static Map fileNames = new HashMap();
/**
* 文件夹地址
*/
private static String path = "D:\\Test\\";
/**
*
*/
public static void main(String[] args) throws Exception {
getFile(path);
FileInputStream fis = null;
BufferedWriter writer = null;
for(Entry entry: fileNames.entrySet()) {
fis = new FileInputStream(path + entry.getValue());
writer = new BufferedWriter(new FileWriter(path + entry.getKey()));
PDFParser p = new PDFParser(fis);
p.parse();
PDFTextStripper ts = new PDFTextStripper();
String ss = ts.getText(p.getPDDocument());
writer.write(ss);
fis.close();
writer.close();
}
}
private static void getFile(String path){
File file = new File(path);
File[] array = file.listFiles();
for(int i=0;i
if(array[i].isFile()){
String hz = array[i].getName().substring(array[i].getName().lastIndexOf("."),array[i].getName().length());
if(hz.equals(".pdf")){
String mz = array[i].getName().substring(0,array[i].getName().lastIndexOf("."));
fileNames.put(mz+".txt", array[i].getName());
}
}else if(array[i].isDirectory()){
getFile(array[i].getPath());
}
}
}
}
如果报如下异常错误:
Exception in thread "main" java.lang.NoClassDefFoundError: org/bouncycastle/jce/provider/BouncyCastleProvider
at org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1594)
at org.apache.pdfbox.pdmodel.PDDocument.decrypt(PDDocument.java:942)
at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:337)
at org.apache.pdfbox.util.PDFTextStripper.getText(PDFTextStripper.java:257)
at com.hkm.TankWar.pdf2.getText(pdf2.java:18)
at com.hkm.TankWar.pdf2.main(pdf2.java:67)
Caused by: java.lang.ClassNotFoundException: org.bouncycastle.jce.provider.BouncyCastleProvider
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 6 more
解决方案:
BouncyCastle可以从www.bouncycastle.org下载(对应JDK版本的BouncyCastle)
1、将下载的bcprov-jdk16-140.jar包放在F:\tools\Java\jdk1.7.0_51\jre\lib\ext目录下;
2、打开F:\tools\Java\jdk1.7.0_51\jre\lib\security目录下的java.security文件,在# List of providers and their preference orders (see above):下面添加:
security.provider.x=org.bouncycastle.jce.provider.BouncyCastleProvider
(上述是网上解决方法并且不进行第二步配置操作也可解决异常问题,但个人解决方式是直接把jar当成普通包使用也解决了异常问题,不用其他配置)