实现功能有统计全部单词数(不记重复)
单词按频率排序输出。
package exam.b;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @author 小e
*
* 2010-5-27 下午10:08:29
*/
public class TextAnalyse {
class Word{
String str;
int num;
public Word(String str){
this.str = str;
num = 1;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + getOuterType().hashCode();
result = prime * result + num;
result = prime * result + ((str == null) ? 0 : str.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Word other = (Word) obj;
if (!getOuterType().equals(other.getOuterType()))
return false;
if (num != other.num)
return false;
if (str == null) {
if (other.str != null)
return false;
} else if (!str.equals(other.str))
return false;
return true;
}
private TextAnalyse getOuterType() {
return TextAnalyse.this;
}
public void increase(){
synchronized (this) {
num ++;
}
}
@Override
public String toString() {
// TODO Auto-generated method stub
return str + "[" + num + "]";
}
}
private String path;
private int wordNums;//出现的单词个数
private Map<String, Word> wordsMap;
private List<String> onceWords;
private List<Word> allWord;
public TextAnalyse(String path) {
this.path = path;
wordsMap = new HashMap<String, Word>();
allWord = new ArrayList<Word>();
}
//单词的比较器
Comparator<Word> wordComparator = new Comparator<Word>() {
@Override
public int compare(Word w1,Word w2) {
return w2.num - w1.num;
}
};
/**
* 文本分析
*/
public void analyse(){
try {
RandomAccessFile rf = new RandomAccessFile(path,"rw");
String str;
String words[];
while((str = rf.readLine()) != null){
words = str.split("\\s+");
add2Set(words);
}
Collections.sort(allWord, wordComparator);//对所有单词按出现次数排序
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 返回单词出现的次数
* @return
*/
public int getWordsNum(){
return wordNums;
}
/**
* 得到只出现一次的单词
* @return
*/
public List<String> getOnceWords(){
if(onceWords == null){
onceWords = new ArrayList<String>();
for(String str : wordsMap.keySet()){
if(wordsMap.get(str).num == 1){
onceWords.add(str);
}
}
}
return onceWords;
}
public List<Word> getAllWord(){
return allWord;
}
private void add2Set(String[] words) {
for(String str : words){
Word word = new Word(str);
if(wordsMap.containsKey(str)){
wordsMap.get(str).increase();//单词个数自增
}else{
wordsMap.put(str, word);
allWord.add(word);
wordNums ++;
}
}
}
public static void main(String[] args) {
TextAnalyse ta = new TextAnalyse("words.txt");
ta.analyse();
System.out.format("文中共出现单词%d次\n", ta.getWordsNum());
System.out.println("出现一次的单词");
for(String word : ta.getOnceWords()){
System.out.print(word + " ");
}
System.out.println("单词按频率从高到底排序");
for(Word word : ta.allWord){
System.out.println(word);
}
}
}