Java读取word文件需要使用
Apache poi.jar,可以在这里下载点击打开链接
代码如下:
package com.ssh.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
public class WordExcel {
public static String readWord(File file){
if(file != null){
if(file.getName().endsWith("docx")){
//处理word2007
return readWord2007(file);
}else if(file.getName().endsWith("doc")){
//处理word2003
return readWord2003(file);
}
else{
return null;
}
}else{
return null;
}
}
private static String readWord2003(File file) {
try {
InputStream is = new FileInputStream(file);
WordExtractor ex = new WordExtractor(is);
String text = ex.getText();
return text;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
private static String readWord2007(File file) {
try {
OPCPackage opcPackage = POIXMLDocument.openPackage(file.getAbsolutePath());
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text = extractor.getText();
return text;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
public static void main(String[] args) {
String text = WordExcel.readWord(new File("****.doc"));
System.out.println(text);
}
}