import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.OutputStreamWriter;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
public class Pdftext {
public static String getTxt(File f) throws Exception {
String ts = "";
try {
String temp = "";
PDDocument pdfdocument = PDDocument.load(f);
ByteArrayOutputStream out = new ByteArrayOutputStream();
OutputStreamWriter writer = new OutputStreamWriter(out);
PDFTextStripper stripper = new PDFTextStripper();
stripper.writeText(pdfdocument.getDocument(), writer);
pdfdocument.close();
out.close();
writer.close();
byte[] contents = out.toByteArray();
ts = new String(contents);
System.out.println(f.getName() + "length is:" + contents.length
+ "\n");
} catch (Exception e) {
e.printStackTrace();
} finally {
return ts;
}
}
public static void main(String[] args) throws Exception {
File file = new File("d:/hello.pdf");
System.out.println(Pdftext.getTxt(file));
/*
File file = new File("d:/hello.pdf");
FileInputStream fis = new FileInputStream(file);
BufferedInputStream bis = new BufferedInputStream(fis);
PDFParser parser = new PDFParser(bis);
//
parser.parse();
PDDocument document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
String s = stripper.getText(document);
//
document.close();// /
bis.close();
// //
File ff = new File("d:/hello.pdf");
ff.createNewFile();
if (ff.exists())
{
ff.createNewFile();
}
FileWriter fw = new FileWriter(ff);
BufferedWriter bw = new BufferedWriter(fw);
bw.write(s);
bw.close();*/
}
}
下载次数: 174
分享到:
2011-07-04 20:59
浏览 5267
评论