1. 需求
处理 word 文本中的内容 ,正则匹配搜索相关内容时使用
2. 测试 jar 包
gradle 依赖
compile group: 'org.apache.poi', name: 'poi', version: '3.13'//word excel 解析
compile group: 'org.apache.poi', name: 'poi-ooxml', version: '3.13'//word excel 解析
compile group: 'org.apache.poi', name: 'poi-scratchpad', version: '3.13'//word excel 解析
3.代码实例
Util 类
public static String getDocContent(String path) throws Exception {
String docContent = "";
File file = new File(path);
FileInputStream in = new FileInputStream(file);
if (".docx".equals(path.substring(path.lastIndexOf(".", path.length())))) {
XWPFDocument document = new XWPFDocument(in);
XWPFWordExtractor wordExtractor = new XWPFWordExtractor(document);
docContent += wordExtractor.getText();
} else if (".doc".equals(path.substring(path.lastIndexOf(".", path.length())))) {
HWPFDocument document = new HWPFDocument(in);
Range range = document.getRange();
docContent += range.text();
}else {
throw new Exception("文件格式不正确");
}
return docContent;
}
测试类:
package com.ycit.poi;
import com.ycit.utils.POIUtils;
import org.junit.Test;
/**
* Created by xlch on 2017/1/11.
*/
public class DocContentTest {
@Test
public void docTest()throws Exception{
String path = "D:\\about project\\perp-service\\5812-1.doc";
// String path = "D:\\about project\\perp-service\\5812-1.docx";
String content = POIUtils.getDocContent(path);
System.out.println(content);
}
}