1 /**
2 * Created by yan.shi on 2017/9/25.3 */
4 importorg.apache.pdfbox.pdfparser.PDFParser;5 importorg.apache.pdfbox.pdmodel.PDDocument;6 importorg.apache.pdfbox.util.PDFTextStripper;7
8 importorg.apache.poi.POIXMLDocument;9 importorg.apache.poi.POIXMLTextExtractor;10 importorg.apache.poi.hwpf.extractor.WordExtractor;11 importorg.apache.poi.openxml4j.exceptions.OpenXML4JException;12 importorg.apache.poi.openxml4j.opc.OPCPackage;13 importorg.apache.poi.xwpf.extractor.XWPFWordExtractor;14 importorg.apache.xmlbeans.XmlException;15
16 importjava.io.File;17 importjava.io.FileInputStream;18 importjava.io.IOException;19
20 /**
21 * 这里使用pdfbox解析pdf类型文档22 * 使用poi解析doc与docx类型文档23 */
24 public classExtractText {25
26 public static voidmain(String[] args) {27 ExtractText text=newExtractText();28 String filePath="文件";29 String content=text.getText(filePath);30 if(null!