本文主要是通过Java来实现统计txt文件当中的字符出现频率。
- 主要使用的是map来进行存储(map访问速度很快,去重方便)
- 另外对map进行了按value排序的功能(默认可以按key排序)
- 然也涉及到文件的读取等,直接上代码:
import java.io.File;
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
public class StringTest1 {
public static Map<String,Integer> ComputeString(String filePath){
Map<String,Integer> map = new HashMap<String,Integer>();
//读取文件
try{
String encoding = "UTF-8";
File file = new File(filePath);
if(file.isFile() && file.exists()){
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while((lineTxt = bufferedReader.readLine())!=null){
for(int i = 1 ; i < lineTxt.length() ; i++){
String temp = lineTxt.substring(i-1,i);
boolean k = false;
//将字符输入到map当中保存
for(String key:map.keySet()){
if(key.equals(temp)){
map.put(key, (int)map.get(key)+1);
k = true;
break;
}
}
if(!k){
map.put(temp, 1);
}
}
}
read.close();
return map;
}else{
System.out.println("cant find the file!");
}
}catch(Exception e){
System.out.println("read file error!");
e.printStackTrace();
}
return map;
}
//对map按照value来排序
public static Map<String,Integer> sortMapByvalue(Map<String, Integer> oriMap){
if(oriMap == null || oriMap.isEmpty()){
return null;
}
Map<String,Integer> sortedMap = new LinkedHashMap<String, Integer>();
List<Map.Entry<String, Integer>> entryList = new ArrayList<Map.Entry<String,Integer>>(
oriMap.entrySet());
Collections.sort(entryList, new MapValueComparator());
Iterator<Map.Entry<String, Integer>> iter = entryList.iterator();
Map.Entry<String, Integer> tmpEntry = null;
while(iter.hasNext()){
tmpEntry = iter.next();
sortedMap.put(tmpEntry.getKey(), tmpEntry.getValue());
}
return sortedMap;
}
//排序的具体函数
public static class MapValueComparator implements Comparator<Map.Entry<String,Integer>>{
@Override
public int compare(Entry<String, Integer> me1, Entry<String,Integer> me2){
return me2.getValue().compareTo(me1.getValue());
}
}
//清除字符
public static Map<String,Integer> ClearClip(Map<String,Integer> oriMap){
String[] clip = new String[]{"0" , "1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9"," ","。",",","?",",","."
,"?",""};
Iterator<String> it = oriMap.keySet().iterator();
while(it.hasNext()){
String key = it.next().toString().trim();
for(int i = 0 ; i < clip.length ; i++){
if(key.equals(clip[i])){
it.remove();
break;
}
}
}
/*
for(String key:oriMap.keySet()){
for(int i = 0 ; i < clip.length ; i++){
if(key.toString().equals(clip[i])){
oriMap.remove(key);
break;
}
}
}
*/
return oriMap;
}
public static void main(String []args){
Map<String,Integer> answerMap = new HashMap<String,Integer>();
String FilePath = "D:\\eclipse\\WebJava2\\poem.txt";
answerMap = ComputeString(FilePath);
answerMap = sortMapByvalue(answerMap);
answerMap = ClearClip(answerMap);
int i = 1;
for(String key:answerMap.keySet()){
if(i <= 20){
System.out.println("我的出现次数排名为: " + i + " key= " + key + " value= "+ answerMap.get(key));
i++;
}else{
break;
}
}
}
}