代码如下
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
/*
* 统计英文文件中单词出现的次数,按照出现次数降序输出(前10)
*/
public class WordCount {
public static void main(String[] args) throws Exception {
Map<String, Integer> map = new HashMap<String, Integer>();
File file = new File("word.txt");
String str = FileUtils.readFileToString(file, "utf8");//读文件将内容转化为字符串
Pattern pattern = Pattern.compile("\\b[a-zA-Z]+\\b");//表示单词的正则表达式
Matcher matcher = pattern.matcher(str);
//统计单词出现次数
while (matcher.find()) {
String msStr = matcher.group();
if (map.containsKey(msStr)) {
map.put(msStr, map.get(msStr) + 1);
} else {
map.put(msStr, 1);
}
}