代码
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class Duilie {
public static void main(String[] args) {
try {
InputStream is = new FileInputStream(new File("D:\\文件\\工作记录.docx"));
XWPFDocument xwpfDocument = new XWPFDocument(is);
XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(xwpfDocument);
String text = xwpfWordExtractor.getText();
String regEx = "[\\u4E00-\\u9FA5\\uf900-\\ufa2d]";
Pattern compile = Pattern.compile(regEx);
Matcher matcher = compile.matcher(text);
List<String> list = new ArrayList<>();
while(matcher.find()){
String group = matcher.group();
list.add(group);
}
HashMap<String,Integer> map = new HashMap<>();
for(String str : list){
if(map.containsKey(str)){
Integer integer = map.get(str);
integer = integer+1;
map.put(str,integer);
}else{
map.put(str,1);
}
}
List<Node> nodeList = new ArrayList<>();
for(Map.Entry<String,Integer> entry : map.entrySet()){
String key = entry.getKey();
Integer value = entry.getValue();
Node node = new Node(value,key);
nodeList.add(node);
}
List<Node> sortList = nodeList.stream().sorted(Comparator.comparing(Node::getCount)).collect(Collectors.toList());
List<List<Object>> content = new ArrayList<>();
for(Node node : sortList){
List<Object> collist = new ArrayList<>();
collist.add(node.getStr()+":"+node.getCount());
content.add(collist);
System.out.println(node.getStr()+":"+node.getCount());
}
ExcelUnit.toExcel(content,"字符统计");
} catch (Exception e) {
e.printStackTrace();
}
}
}