No bibi 亮代码
/**
* 读取doc
*/
@Test
public void readWord2003() throws Exception{
InputStream is = new FileInputStream("E:/test.doc");
HWPFDocument doc2003 = new HWPFDocument(is);
WordExtractor word2003 = new WordExtractor(doc2003);
System.out.println(word2003.getText());
word2003.close();
doc2003.close();
is.close();
}
/**
* 读取docx
*/
@Test
public void readWord2007() throws Exception{
InputStream is = new FileInputStream("E:/test.docx");
XWPFDocument doc2007 = new XWPFDocument(is);
XWPFWordExtractor word2007 = new XWPFWordExtractor(doc2007);
System.out.println(word2007.getText());
word2007.close();
doc2007.close();
is.close();
}
/**
* 读取pdf
*/
@Test
public void readPDF() throws Exception{
InputStream is = new FileInputStream("E:/test.pdf");
PDDocument document=PDDocument.load(is);
PDFTextStripper stripper = new PDFTextStripper();
String content = stripper.getText(document);
System.out.println(content);
document.close();
is.close();
}