java相关的英语论文_用Java统计英文文章中的前十个高频词-CSDN博客

本文链接：https://blog.csdn.net/weixin_30551437/article/details/114560316

import java.io.BufferedReader;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileReader;

import java.io.IOException;

import java.util.HashMap;

import java.util.StringTokenizer;

import java.util.TreeSet;

public class CountWords {

final static int show_word_Count = 10;//

最后显示输出的高频单词的个数，10个

final static String[] F_Words = { "a", "an", "the", "of",

"in", "on", "and", "or","to","be","do",

"oh", "but" };//定义虚词

public static void main(String[] args) {

String filePath = "D:\\text.txt";

StringBuffer sb = new StringBuffer();

File file = new File(filePath);

try {

FileReader fileReader = new FileReader(file);

BufferedReader reader = new BufferedReader(fileReader);

String line;

while ((line = reader.readLine()) != null) {

sb.append(line);

}

} catch (FileNotFoundException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

// 用StringTokenizer对象根据分隔符分离单词，分隔符有, . ! ? 空格 \n(换行)

StringTokenizer stringTokenizer = new

StringTokenizer(sb.toString(),

",.!? \n");

HashMap map = new HashMap();

while (stringTokenizer.hasMoreTokens()) {

String word = stringTokenizer.nextToken();

int count;

if (map.get(word) == null) {

count = 1; // 如果map中没有那个单词，就把它出现的次数设置为1

} else {

count = map.get(word).intValue() + 1; //

如果map中有那个单词，就把出现的次数+1

}

map.put(word, count); // 向map中添加数据

}

// 创建TreeSet对象 wordEntity类继承Comparable类用于自定义排序

TreeSet set = new TreeSet();

// 遍历map,向TreeSet中添加数据

for (String word : map.keySet()) {

set.add(new WordEntity(word, map.get(word)));

}

java.util.Iterator it = set.iterator();

System.out.println("单词出现频率由高到低的前" + show_word_Count +

"个为:(ps:已去掉虚词)");

// 输出频率最高的前10个单词

for (int i = 0; i < show_word_Count;) {

WordEntity entity = it.next();

if (entity != null) {

if(!isF_Words(entity.getName())){

System.out.println("单词:" + entity.getName() + "\t出现次数为:"

+ entity.getCount());

i++;

}

} else {

System.out.println("单词不足需要显示的个数");

break;

}

// 判断该单词是否属于虚词

private static boolean isF_Words(String word) {

for (String thisW : F_Words)

if (thisW.equals(word))

return true;

return false;

}

运行截图: