java中如何统计一段文本每个汉字出现频率

最新推荐文章于 2022-10-03 16:42:57 发布

qingxili

最新推荐文章于 2022-10-03 16:42:57 发布

阅读量1.8k

点赞数

本文链接：https://blog.csdn.net/qingxili/article/details/44427449

版权

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class WordCount {
public HashMap<String, Integer>count(File file)
{
   HashMap<String, Integer>map=new HashMap<String, Integer>();
   long count = 0;
   String regex = "[\\u4e00-\\u9fa5]";
   try{
    BufferedReader in=new BufferedReader(new FileReader(file));
    String line=in.readLine();
    while(line!=null)
    {
     Pattern p = Pattern.compile(regex);
     Matcher m = p.matcher(line);
     while(m.find())
     {
      count++;
      String word=m.group();
      if(map.containsKey(word)){
       map.put(word, map.get(word)+1);
      }
      else {
     map.put(word, 1);
    }
     }
     line=in.readLine();

    }
   }catch (Exception e) {
  // TODO: handle exception
    System.out.println("File error");
}
   System.out.println("字数： "+count);
   return map;
}
public static void main(String[] args) {
File file=new File("D:\\dou.txt");
WordCount wc=new WordCount();
System.out.println("使用汉字数目： "+wc.count(file).size());
}
}