java读取文件统计一串英文出现频率最高的单词或字母
1.java统计一串英文出现频率最高的单词
package count;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Count {
public static void main(String[] args) {
long start = System.currentTimeMillis();
BufferedReader reader = new BufferedReader(new FileReader(
"D:\\main.txt"));
StringBuffer buffer = new StringBuffer();
String line = null;
while ((line = reader.readLine()) != null) {
buffer.append(line);
}
reader.close();
String str = = buffer.toString();
str = str.replace(',', ' ');//将逗号用空格替换
str = str.replace('.', ' ');//将句号用空格替换
String[] strings = str.split("\\s+"); // “\\s+”代表一个或多个空格,是正则表达式
// String[] strings = str.split(" +"); // “ +”在我的机器上也能代表一个或多个空格
Map<String, Integer> map = new HashMap<String, Integer>();
List<String> list = new ArrayList<String>();//存储每个不重复的单词
for(String s : strings){
if(map.containsKey(s)){//如果map中已经包含该单词,则将其个数+1
int x = map.get(s);
x++;
map.put(s, x);
}else{ //如果map中没用包含该单词,代表该单词第一次出现,则将其放入map并将个数设置为1
map.put(s, 1);
list.add(s);//将其添加到list中,代表它是一个新出现的单词
}
}
int max=0;//记录出现次数最多的那个单词的出现次数
String maxString = null;//记录出现次数最多的那个单词的值
/*
* 从list中取出每个单词,在map中查找其出现次数
* 并没有真正排序,而只是记录下出现次数最多的那个单词
*/
for(String s : list){
int x = map.get(s);
if(x>max){
maxString = s;
max = x;
}
}
System.out.println(maxString);
long end = System.currentTimeMillis();
System.out.println("共耗时:" + (end - start) + "毫秒");
}
}
2.java统计一串英文出现频率最高的字母
package com.algorithm.interview;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Frequency {
public void count() throws Exception {
File file = new File("D:\\main.txt");
FileReader fileReader = new FileReader(file);
BufferedReader reader = new BufferedReader(fileReader);
StringBuilder builder = new StringBuilder();
String line = "";
while ((line = reader.readLine()) != null) {
builder.append(line);
}
Pattern pattern = Pattern.compile("[a-zA-Z]+");
String content = builder.toString();
Matcher matcher = pattern.matcher(content);
Map<String, Integer> map = new HashMap<String, Integer>();
String word = "";
Integer times = 0;
while (matcher.find()) {
word = matcher.group();
if (map.containsKey(word)) {
times = map.get(word);
map.put(word, times+1);
} else {
map.put(word, 1);
}
}
List<Map.Entry<String, Integer>> list = new LinkedList<Map.Entry<String, Integer>>(
map.entrySet());// put Entry to List
Compare compare = new Compare();//rewrite Comparator
for (int i = 0; i < 5; i++) {
Map.Entry<String, Integer> entry = Collections.max(list, compare);// max
String key = entry.getKey();
Integer value = entry.getValue();
int index = list.indexOf(entry);//get max's index
System.out.println(key + " " + value);
list.remove(index);//remove max
}
}
public static void main(String[] args) {
try {
Frequency frequency = new Frequency();
frequency.count();
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.java统计一串英文单词字数并排序
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.Map.Entry;
public class wordCount {
public static void main(String arg[]){
int wordCount=0; //用于统计单词的总个数
Map<String,Integer> map=new HashMap<String,Integer>();//用于统计各个单词的个数,排序
String sentence="hello,my name is Tom,what is your name?he said:\"my name is John\"";
StringTokenizer token=new StringTokenizer(sentence);//这个类会将字符串分解成一个个的标记
while(token.hasMoreTokens()){ //循环遍历
wordCount++;
String word=token.nextToken(", ?.!:\"\"''\n"); //括号里的字符的含义是说按照,空格 ? . : "" '' \n去分割,如果这里你没明确要求,即括号里为空,则默认按照空格,制表符,新行符和回车符去分割
if(map.containsKey(word)){ //HashMap不允许重复的key,所以利用这个特性,去统计单词的个数
int count=map.get(word);
map.put(word, count+1); //如果HashMap已有这个单词,则设置它的数量加1
}
else
map.put(word, 1); //如果没有这个单词,则新填入,数量为1
}
System.out.println("总共单词数:"+wordCount);
sort(map); //调用排序的方法,排序并输出!
}
public static void sort(Map<String,Integer> map){
List<Map.Entry<String, Integer>> infoIds = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
Collections.sort(infoIds, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return (o2.getValue() - o1.getValue());
}
}); //排序
for (int i = 0; i < infoIds.size(); i++) { //输出
Entry<String, Integer> id = infoIds.get(i);
System.out.println(id.getKey()+":"+id.getValue());
}
}
}