文章借鉴于:http://blog.csdn.net/u014204432/article/details/40348839
需求:
统计一个英文文档中各个单词出现的频率(由大到小排序),单词之间用”逗号,句号,空格隔开”,将结果输出至控制台。
思路:
1、利用输入流实现对文件内容的输入;
2、将文件内容存入StringBuffer中;
3、利用String的split()方法将字符串分隔,并将其存入数组中;
4、遍历数组将其存入Map<String, Integer>中。其中,key中存放单词,value中存放key中单词出现的次数。
5、利用Collections的sort()方法对TreeMap的value进行排序(很多时候TreeMap是根据key的值来进行排序的,但是有时我们需要根据TreeMap的value来进行排序。对value排序我们就需要借助于Collections的sort(List<T> list, Comparator<? super T> c)方法,该方法根据指定比较器产生的顺序对指定列表进行排序。TreeMap默认是升序的,如果我们需要改变排序方式,则需要使用比较器:Comparator。Comparator可以对集合对象或者数组进行排序的比较器接口,实现该接口的public compare(T o1,T o2)方法即可实现排序,该方法主要是根据第一个参数o1,小于、等于或者大于o2分别返回负整数、0或者正整数。但是有一个前提条件,那就是所有的元素都必须能够根据所提供的比较器来进行比较。)
public class SortWords1 {
public static void main(String[] args) {
// TODO Auto-generated method stub
long t1 = System.currentTimeMillis();
String s;
BufferedReader br = null;
try {
String fileName1 = "e:/test.txt";
br = new BufferedReader(new FileReader(fileName1));
/**代码:
* String fileName1 = "e:/test.txt";
br = new BufferedReader(new FileReader(fileName1));
也可以写作:
File file = new File("e:/test.txt");
* br = new BufferedReader(new FileReader(file));
*/
StringBuffer sb = new StringBuffer();
//将文件内容存入StringBuffer中
while((s = br.readLine()) != null) {
sb.append(s);
}
//不区分大小写
String str = sb.toString().toLowerCase();
//分隔字符串并存入数组 (以,。空格分割)
String[] elements = str.split("[,.\\s]");
int count = 0;
Map<String, Integer> myTreeMap = new TreeMap<String, Integer>();
//遍历数组将其存入Map<String, Integer>中
for(int i = 0; i < elements.length; i++) {
if(myTreeMap.containsKey(elements[i])) {
count = myTreeMap.get(elements[i]);
myTreeMap.put(elements[i], count + 1);
}
else {
myTreeMap.put(elements[i], 1);
}
}
/**
* 直接遍历Map输出
*/
System.out.println("直接遍历Map输出(无序):");
for(Map.Entry<String, Integer> entry:myTreeMap.entrySet()){
System.out.println(entry.getKey()+":"+entry.getValue());
}
/**
* 对频率从大到小排序,然后输出
*/
System.out.println("对频率从大到小排序,然后输出:");
//将map.entrySet()转换成list
List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(myTreeMap.entrySet());
//通过比较器实现排序
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
//降序排序
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
// return o1.getValue().compareTo(o2.getValue()); //升序排序
}
});
for(Map.Entry<String, Integer> map : list) {
System.out.println(map.getKey() + ":" + map.getValue());
}
System.out.println("耗时:" + (System.currentTimeMillis() - t1) + "ms");
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
若要将频率最高的前十个单词输出至指定的文本文件,代码为:
public class SortWords2 {
public static void main(String[] args) {
long t1 = System.currentTimeMillis();
String s;
String fileName1 = "e:/test.txt";
String fileName2 = "e:/result.txt";
BufferedReader br = null;
BufferedWriter bw = null;
try {
br = new BufferedReader(new FileReader(fileName1));
bw = new BufferedWriter(new FileWriter(fileName2));
StringBuffer sb = new StringBuffer();
//将文件内容存入StringBuffer中
while((s = br.readLine()) != null) {
sb.append(s);
}
//不区分大小写
String str = sb.toString().toLowerCase();
//分隔字符串并存入数组 (以,。空格分割)
String[] elements = str.split("[,.\\s]");
int count = 0;
Map<String, Integer> myTreeMap = new TreeMap<String, Integer>();
//遍历数组将其存入Map<String, Integer>中
for(int i = 0; i < elements.length; i++) {
if(myTreeMap.containsKey(elements[i])) {
count = myTreeMap.get(elements[i]);
myTreeMap.put(elements[i], count + 1);
}
else {
myTreeMap.put(elements[i], 1);
}
}
System.out.println("单词统计的结果请见当前目录result.txt文件");
//将map.entrySet()转换成list
List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(myTreeMap.entrySet());
//通过比较器实现排序
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
//降序排序
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
int num = 1;
//将结果写入文件
for(Map.Entry<String, Integer> map : list) {
if(num <= 10) {
bw.write("出现次数第" + num + "的单词为:" + map.getKey() + ",出现频率为" + map.getValue() + "次");
bw.newLine();//写入一个行分隔符
num++;
}
else break;
}
bw.write("耗时:" + (System.currentTimeMillis() - t1) + "ms");
System.out.println("耗时:" + (System.currentTimeMillis() - t1) + "ms");
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
br.close();
bw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}