POI读取word文件,(支持HSSF和XSSF两种方式)
参考:HSSF,XSSF,SXSSF三种方式
1.引用maven(版本必须一致)
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.1</version>
</dependency>
2.读取word
public class POIUtil {
public static String readWord(String path) throws Exception {
String content = null;
File file = new File(path);
if (file.exists() && file.isFile()) {
InputStream is = null;
XWPFDocument xwpfDocument = null;
POIXMLTextExtractor extractor = null;
HWPFDocument hwpfDocument = null;
WordExtractor wordExtractor = null;
try {
is = new FileInputStream(file);
xwpfDocument = new XWPFDocument(is);
extractor = new XWPFWordExtractor(xwpfDocument);
content = extractor.getText();
} catch (FileNotFoundException e) {
} catch (IOException e) {
} catch (OLE2NotOfficeXmlFileException e) {
is = new FileInputStream(file);
hwpfDocument = new HWPFDocument(is);
wordExtractor = new WordExtractor(hwpfDocument);
content = wordExtractor.getText();
} finally {
try {
if (extractor != null) {
extractor.close();
}
if (xwpfDocument != null) {
xwpfDocument.close();
}
if (wordExtractor != null) {
wordExtractor.close();
}
if (hwpfDocument != null) {
hwpfDocument.close();
}
if (is != null) {
is.close();
}
} catch (IOException e) {
}
}
}
return content;
}
public static void main(String[] args) {
String path = "/Users/jj/Desktop/胜多负少的范德萨.doc";
try {
System.out.println(readWord(path));
} catch (Exception e) {
e.printStackTrace();
}
}
}