方法一
import java.io.*;
import java.util.*;
public class Test{
public static void main(String args[])throws Exception{
File file = new File("d:\\src\\Test.java");
display(file);
}
public static void display(File file)throws Exception{
BufferedReader br = new BufferedReader(new FileReader(file));
String line = null;
TreeMap tm = new TreeMap();
while((line=br.readLine())!=null){
line.toLowerCase();
String reg1 = "\\s+";
String reg2 ="^[a-zA-Z]\\w*";
String str[] = line.split(reg1);
for(String s: str){
if(s.matches(reg2)){
if(!tm.containsKey(s)){
tm.put(s,1);
}
else{
tm.put(s,tm.get(s)+1);
}
}
}
}
System.out.println(tm);
}
}
方法二 思路: 1、将文件内容存入StringBuffer中。 2、利用split()函数分割字符串,可按(“,”,“.”,“!”,“空格”,“回车”)分割,得到一个数组。 3、遍历数组,将其放入一个Map 中,key=单词,value=单词出现的次数。 4、如要求出文件中出现频率最高的几个单词,则要对Map进行排序。
//以下是实现一个文件中出现频率最高的单词的统计
//FileWordCount.java主函数所在文件
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: FLY
* Date: 11-9-13
* Time: 下午3:59
* To change this template use File | Settings | File Templates.
*/
public class FileWordCount {
public static void main(String[] args) {
try {
BufferedReader br = new BufferedReader(new FileReader("D:\\test.txt"));
String s;
StringBuffer sb = new StringBuffer();
while ((s = br.readLine()) != null) {
sb.append(s);
}
Map map = new HashMap();
StringTokenizer st = new StringTokenizer(sb.toString(),",.! \n");
while (st.hasMoreTokens()) {
String letter = st.nextToken();
int count;
if (map.get(letter) == null) {
count = 1;
} else {
count = map.get(letter).intValue() + 1;
}
map.put(letter,count);
}
Set set = new TreeSet();
for (String key : map.keySet()) {
set.add(new WordEntity(key,map.get(key)));
}
// 自己拼接字符串,输出我们想要的字符串格式
System.out.println("输出形式一:");
for (Iterator it = set.iterator(); it.hasNext(); ) {
WordEntity w = it.next();
System.out.println("单词:" + w.getKey() + " 出现的次数为: " + w.getCount());
}
// 直接打印 WordEntity 对象,实现我们想要的输出效果,只需在WordEntity类中重写toString()方法
System.out.println("输出形式二:");
for (Iterator it = set.iterator(); it.hasNext(); ) {
WordEntity w = it.next();
System.out.println(w);
}
// 我们可以控制只输出前三名来
System.out.println("输出形式三:");
int count = 1;
for (Iterator it = set.iterator(); it.hasNext(); ) {
WordEntity w = it.next();
System.out.println("第" + count + "名为单词:" + w.getKey() + " 出现的次数为: "
+ w.getCount());
if (count == 3)// 当输出3个后跳出循环
break;
count++;
}
} catch (FileNotFoundException e) {
System.out.println("文件未找到~!");
} catch (IOException e) {
System.out.println("文件读异常~!");
}
}
}
//WordEntity.java文件
/**
* Created by IntelliJ IDEA.
* User: FLY
* Date: 11-9-13
* Time: 下午4:57
* To change this template use File | Settings | File Templates.
*/
public class WordEntity implements Comparable {
private String key;
private Integer count;
public WordEntity (String key, Integer count) {
this.key = key;
this.count = count;
}
public int compareTo(WordEntity o) {
int cmp = count.intValue() - o.count.intValue();
return (cmp == 0 ? key.compareTo(o.key) : -cmp);
//只需在这儿加一个负号就可以决定是升序还是降序排列 -cmp降序排列,cmp升序排列
//因为TreeSet会调用WorkForMap的compareTo方法来决定自己的排序
}
@Override
public String toString() {
return key + " 出现的次数为:" + count;
}
public String getKey() {
return key;
}
public Integer getCount() {
return count;
}
} 参考: [1].
http://bbs.csdn.net/topics/380250029 [2].
http://blog.csdn.net/love_sophie/article/details/6771452