/**
* 统计文本每个单词的个数
* @param text 文本
* @param ignoreCase 是否忽略大小写
* @param sortFlag desc降序;asc升序,其他默认desc
* @param topN 前几条
* @return
*/
public static List<Map.Entry<String, Integer>> countEachWorld(String text,boolean ignoreCase,final String sortFlag,int topN){
Matcher m=Pattern.compile("\\w+").matcher(text);
String matcheStr=null;
Map<String ,Integer> map=new LinkedHashMap<>();
Integer count=0;
while(m.find()){
matcheStr=m.group();
matcheStr=ignoreCase?matcheStr.toLowerCase():matcheStr;
count=map.get(matcheStr);
map.put(matcheStr, count!=null?count+1:1);
}
List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
int flag=-o1.getValue().compareTo(o2.getValue());
return "asc".equalsIgnoreCase(sortFlag)?-flag:flag;
}
});
int size=list.size();
topN=topN>size?size:topN;
return list.subList(0, topN);
}