首先引入Maven依赖,如下
<!-- ************word************** --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>openxml4j</artifactId> <version>1.0-beta</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>dom4j</groupId> <artifactId>dom4j</artifactId> <version>1.6.1</version> </dependency> <dependency> <groupId>org.apache.geronimo.specs</groupId> <artifactId>geronimo-stax-api_1.0_spec</artifactId> <version>1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.1</version> </dependency> <dependency> <groupId>org.apache.xmlbeans</groupId> <artifactId>xmlbeans</artifactId> <version>2.3.0</version> </dependency>
下面编写Java工具类,代码如下
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
public class WordReader {
public synchronized static String read(String url){
if (url.endsWith("doc") || url.endsWith("DOC")) {
return readWord2003(url);
}else if (url.endsWith("docx") || url.endsWith("DOCX")) {
return readWord2007(url);
}else {
return "";
}
}
private static String readWord2007(String url) {
POIXMLTextExtractor ex = null;
XWPFDocument xwpf = null;
InputStream is = null;
try {
is = new URL(url).openStream();
xwpf = new XWPFDocument(is);
ex = new XWPFWordExtractor(xwpf);
return ex.getText();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
ex.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
xwpf.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
private static String readWord2003(String url) {
WordExtractor wordExtractor = null;
InputStream fis = null;
try {
fis = new URL(url).openStream();
wordExtractor = new WordExtractor(fis);
String content = wordExtractor.getText();
return content;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
wordExtractor.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
public static void main(String[] args) {
System.out.println(read("http://file.neeq.com.cn/upload/A0/B0/C2/F236.doc"));
System.out.println(read("http://file.neeq.com.cn/upload/A0/B0/C2/F260.docx"));
}
}