解析文件
doc、docx解析方法
public static String parseFile(File docFile) {
String fileName = docFile.getName();
String docContent = null;
FileInputStream fis = null;
try {
fis = new FileInputStream(docFile);
if (fileName.toLowerCase().endsWith(doc)) {
HWPFDocument doc = new HWPFDocument(fis);
docContent = doc.getDocumentText();
} else if (fileName.toLowerCase().endsWith(docx)) {
XWPFDocument xdoc = new XWPFDocument(fis);
XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc);
docContent = extractor.getText();
} else {
logger.info("文件名为:" + fileName + " 格式有误");
}
} catch (Except