首先导入jar包 pdfbox-app-1.6.0.jar fontbox-1.6.0.jar pdfbox-1.6.0.jar 和 jempbox-1.6.0.jar
同时还要导入lucene的3.0包 lucene-core-3.0.0.jar
下载网址 http://pdfbox.apache.org/download.html
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.lucene.document.Document;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
public class PdfLuceneTest {
public static void main(String[] args) {
try {
geText("D:\\test\\small\\1.pdf");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void geText(String file) throws E