import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.StringTokenizer;
import java.util.TreeSet;
public class CountWords {
final static int show_word_Count = 10;//
最后显示输出的高频单词的个数,10个
final static String[] F_Words = { "a", "an", "the", "of",
"in", "on", "and", "or","to","be","do",
"oh", "but" };//定义虚词
public static void main(String[] args) {
String filePath = "D:\\text.txt";
StringBuffer sb = new StringBuffer();
File file = new File(filePath);
try {
FileReader fileReader = new FileReader(file);
BufferedReader reader = new BufferedReader(fileReader);
String line;
while ((line = reader.readLine()) != null) {
sb.append(line);
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// 用StringTokenizer对象根据分隔符分离单词,分隔符有, . ! ? 空格 \n(换行)
StringTokenizer stringTokenizer = new
StringTokenizer(sb.toString(),
",.!? \n");
HashMap map = new HashMap();
while (stringTokenizer.hasMoreTokens()) {
String word = stringTokenizer.nextToken();
int count;
if (map.get(word) == null) {
count = 1; // 如果map中没有那个单词,就把它出现的次数设置为1
} else {
count = map.get(word).intValue() + 1; //
如果map中有那个单词,就把出现的次数+1
}
map.put(word, count); // 向map中添加数据
}
// 创建TreeSet对象 wordEntity类继承Comparable类用于自定义排序
TreeSet set = new TreeSet();
// 遍历map,向TreeSet中添加数据
for (String word : map.keySet()) {
set.add(new WordEntity(word, map.get(word)));
}
java.util.Iterator it = set.iterator();
System.out.println("单词出现频率由高到低的前" + show_word_Count +
"个为:(ps:已去掉虚词)");
// 输出频率最高的前10个单词
for (int i = 0; i < show_word_Count;) {
WordEntity entity = it.next();
if (entity != null) {
if(!isF_Words(entity.getName())){
System.out.println("单词:" + entity.getName() + "\t出现次数为:"
+ entity.getCount());
i++;
}
} else {
System.out.println("单词不足需要显示的个数");
break;
}
}
}
// 判断该单词是否属于虚词
private static boolean isF_Words(String word) {
for (String thisW : F_Words)
if (thisW.equals(word))
return true;
return false;
}
}
运行截图: