import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WordCount {
public HashMap<String, Integer>count(File file)
{
HashMap<String, Integer>map=new HashMap<String, Integer>();
long count = 0;
String regex = "[\\u4e00-\\u9fa5]";
try{
BufferedReader in=new BufferedReader(new FileReader(file));
String line=in.readLine();
while(line!=null)
{
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(line);
while(m.find())
{
count++;
String word=m.group();
if(map.containsKey(word)){
map.put(word, map.get(word)+1);
}
else {
map.put(word, 1);
}
}
line=in.readLine();
}
}catch (Exception e) {
// TODO: handle exception
System.out.println("File error");
}
System.out.println("字数: "+count);
return map;
}
public static void main(String[] args) {
File file=new File("D:\\dou.txt");
WordCount wc=new WordCount();
System.out.println("使用汉字数目: "+wc.count(file).size());
}
}