以前经常在CSDN博客上看别人的文章,有时是遇到难题为了寻找思路,有时则是为了偷懒,直接复制别人的源代码,所以对于写这些博客的技术大神,我都是佩服且感激的。感激于他们把我的问题分析的如此透彻明白,佩服于他们遇到问题不但自己能解决,还把思路分享给我,使我也能够受益。今天,这些‘雷锋“或者”红领巾“传递的正能量终于感动了我(实际上是teacher的作业…哎),我也要写自己的技术博客啦,好吧,废话不多说,开始写思路!
统计字符串在某一文本出现的概率,思路上比较简单,分为以下几步:
1.读取项目的文件位置,将读出的字节流转化为字符串;
2.字符串切分,利用空格符把独处的字符串切分为字符串数组;
3.字符串统计,遍历生成字符串数组,将这些字符串数组存储为hanshMap形式,key为字符串,value为出现次数;
4.对常见的虚词或者介词进行过滤
5.对hanshMap的value排序,即可得到相应结果。
OK ,就是这么简单,剩下的可能就是有些具体代码大家会遇到点问题,所以直接贴代码:
package com.zhongcai.demo;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* Author:zhangjw
* Date:2014/9/28 16:37
* function:统计一段字符串中出现频率最高的前10个词
*
* */
public class Count {
public static void main(String[] args) {
Count c = new Count();
String filepath0 = new File("File").getAbsolutePath();
System.out.println(filepath0);
String filepath = new File("File").getAbsolutePath()+"\\in.txt";
String source = c.getStringFrom(filepath);
String[] stringArray = source.split(" ");
HashMap<String , Integer> map = new HashMap<String , Integer>();
for(int i = 0; i < stringArray.length; i++)
{
Iterator iterator = map.keySet().iterator();
int flag = 0;
while (iterator.hasNext()) {
Object key = iterator.next();
if(map.containsKey(stringArray[i]))
{
int value = map.get(stringArray[i]);
value++;
map.put(stringArray[i], value);
flag = 1;
break;
}
}
if(flag == 0)
{
map.put(stringArray[i], 1);
}
}
map = (HashMap<String, Integer>)new Count().sortByValue(map);
Iterator iterator = map.keySet().iterator();
int count = 0;
FileWriter fileWriter;
try {
fileWriter = new FileWriter(new File("File").getAbsolutePath()+"\\out.txt");
fileWriter.write("*************************"+"\n");
fileWriter.write("***频率最高的10个词语为***"+"\n");
fileWriter.write("*************************"+"\n");
while (iterator.hasNext()) {
Object key = iterator.next();
String filter = key.toString();
//在此处进行常见字符串的过滤
if(!filter.equals("would")&&!filter.equals("when")&&!filter.equals("they")&&!filter.equals("so")
&&!filter.equals("I")&&!filter.equals("if")&&!filter.equals("up")&&!filter.equals("who")
&&!filter.equals("been")&&!filter.equals("upon")&&!filter.equals("which")&&!filter.equals("from")
&&!filter.equals("an")&&!filter.equals("my")&&!filter.equals("very")&&!filter.equals("Mr")
&&!filter.equals("My")&&!filter.equals("have")&&!filter.equals("him")&&!filter.equals("no")
&&!filter.equals("The")&&!filter.equals("were")&&!filter.equals("this")&&!filter.equals("all")
&&!filter.equals("or")&&!filter.equals("by")&&!filter.equals("she")&&!filter.equals("as")
&&!filter.equals("but")&&!filter.equals("—")&&!filter.equals("on")&&!filter.equals("be")
&&!filter.equals("her")&&!filter.equals("his")&&!filter.equals("had")&&!filter.equals("for")
&&!filter.equals("that")&&!filter.equals("with")&&!filter.equals("was")&&!filter.equals("at")
&&!filter.equals("he")&&!filter.equals("it")&&!filter.equals("in")&&!filter.equals("of")
&&!filter.equals("is")&&!filter.equals("not")&&!filter.equals("you")&&!filter.equals("me")
&&!filter.equals("He")&&!filter.equals("")&&!filter.equals("and")&&!filter.equals("I")&&
!filter.equals("the")&&!filter.equals("to")&&!filter.equals(" ")&&!filter.equals("a"))
{
try {
fileWriter.write(" "+String.valueOf(key.toString()+" "+map.get(key)+"次"+"\n"));
System.out.println(" "+String.valueOf(key.toString()+" "+map.get(key)+"次"+"\n"));
count++;
fileWriter.flush();
if(count == 10)
{
fileWriter.write("************************"+"\n");
fileWriter.write("****0491201:张俊伟******"+"\n");
fileWriter.write("************************"+"\n");
fileWriter.close();
break;
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
} catch (IOException e1) {
e1.printStackTrace();
}
}
public Map sortByValue(Map<String, Integer> map) {
List list = new LinkedList(map.entrySet());
Collections.sort(list, new Comparator(){
public int compare(Object o1, Object o2) {
return ((Comparable) ((Map.Entry)o2).getValue())
.compareTo(((Map.Entry)o1).getValue());
}
});
Map result = new LinkedHashMap();
for (Iterator it = list.iterator(); it.hasNext();) {
Map.Entry entry = (Map .Entry) it.next();
result.put(entry.getKey(), entry.getValue());
}
return result;
}
public String getStringFrom(String filepath) {
File file = new File(filepath);
StringBuilder sb = new StringBuilder();
BufferedReader br;
try {
br = new BufferedReader(new FileReader(file));
String temp = "";
try {
while ((temp = br.readLine())!=null) {
sb.append(temp+" ");
}
br.close();
String result = sb.toString();
return result;
} catch (IOException e) {
e.printStackTrace();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null;
}
}
原文链接:http://blog.csdn.net/unexisted_/article/details/40351073