需要引用apache的poi开源项目包;下载地址:http://poi.apache.org/download.html
引入6个jar包:poi-3.8-20120326.jar;poi-ooxml-3.8-20120326.jar;poi-ooxml-schemas-3.8-20120326.jar;poi-scratchpad-3.8-20120326.jar;xmlbeans-2.3.0.jar;dom4j-1.6.1.jar。
public static String readWord(String filePath){
String text = "";
File file = new File(filePath);
//2003
if(file.getName().endsWith(".doc")){
try {
FileInputStream stream = new FileInputStream(file);
WordExtractor word = new WordExtractor(stream);
text = word.getText();
//去掉word文档中的多个换行
text = text.replaceAll("(\\r\\n){2,}", "\r\n");
text = text.replaceAll("(\\n){2,}", "\n");
stream.close();
} catch (Exception e) {
e.printStackTrace();
}
}else if(file.getName().endsWith(".docx")){ //2007
try {
OPCPackage oPCPackage = POIXMLDocument.openPackage(filePath);
XWPFDocument xwpf = new XWPFDocument(oPCPackage);
POIXMLTextExtractor ex = new XWPFWordExtractor(xwpf);
text = ex.getText();
//去掉word文档中的多个换行
text = text.replaceAll("(\\r\\n){2,}", "\r\n");
text = text.replaceAll("(\\n){2,}", "\n");
System.out.println("ok");
} catch (Exception e) {
e.printStackTrace();
}
}
return text;
}