使用POI获取 文档内容
1.添加依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
2.上代码
/**
* 解析 docx
*
* @param docUrl 本地文档地址
* @return 所读取的文档的对象
*/
public static String parseDoc(String docUrl) {
FileInputStream fis = null;
XWPFDocument document = null;
String content = null;
try {
fis = new FileInputStream(new File(docUrl));
document = new XWPFDocument(fis);
//XWPFWordExtractor
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
content = extractor.getText();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return content;
}
/**
* 从链接中解析 docx
* @param docUrl doc访问链接
* @return doc内容
*/
public static String parseDocFromChain(String docUrl) {
InputStream fis = null;
XWPFDocument document = null;
String content = null;
try {
fis = new URL(docUrl).openStream();
document = new XWPFDocument(fis);
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
content = extractor.getText();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return content;
}
3.获取内容后就可按需进行操作了