Java统计文本高频词汇

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.TreeSet;



public class CountWords {
	 
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		try {
			File file =new File("C:\\2.txt");
			countLetter(file,3);
		} catch (Exception e) {
			
			e.printStackTrace();
		}
	}
	public static void countLetter(File file,int frequency) throws Exception{
		BufferedReader br=new BufferedReader(new FileReader(file));
		Map<String,Integer> map =new HashMap<String, Integer>();
		try{
			String line=null;
			//遍历文本讲字符-次数添加到map中去
			while((line=br.readLine())!=null){
				StringTokenizer stoken =new StringTokenizer(line, ",.! 	");
				while(stoken.hasMoreElements()){
					int count;
					String letter=stoken.nextToken();
					if(!map.containsKey(letter)){
						count=1;
					}else{
						count=map.get(letter)+1;
					}
					map.put(letter, count);
				}
			}
		}finally{
			br.close();
		}
		TreeSet<WordBean> set =new TreeSet<WordBean>();
		for(Map.Entry<String, Integer> entry:map.entrySet()){
				String key =entry.getKey();
				Integer count=map.get(key);
				set.add(new WordBean(key, count));
		}
		//输出set中的数据
		Iterator ite =set.iterator();
		int count=0;
		while(ite.hasNext()){
			if(count++<frequency){
				WordBean bean =(WordBean) ite.next();
				System.out.print(bean.getKey()+":"+bean.getCount());
				System.out.println("");
			}else{
				break;
			}
		}
	}
	
}
class WordBean implements Comparable<WordBean>{
	String key;
	Integer count;
	public WordBean(String key, Integer count) {
		super();
		this.key = key;
		this.count = count;
	}
	
	public String getKey() {
		return key;
	}

	public void setKey(String key) {
		this.key = key;
	}

	public Integer getCount() {
		return count;
	}

	public void setCount(Integer count) {
		this.count = count;
	}

	//WordBean按照count排序
	@Override
	public int compareTo(WordBean o) {
		int temp=this.count-o.count;
		return temp==0?this.key.compareTo(o.key):-temp; //逆序负号
	}

	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + ((count == null) ? 0 : count.hashCode());
		result = prime * result + ((key == null) ? 0 : key.hashCode());
		return result;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj)
			return true;
		if (obj == null)
			return false;
		if (getClass() != obj.getClass())
			return false;
		WordBean other = (WordBean) obj;
		if (count == null) {
			if (other.count != null)
				return false;
		} else if (!count.equals(other.count))
			return false;
		if (key == null) {
			if (other.key != null)
				return false;
		} else if (!key.equals(other.key))
			return false;
		return true;
	}
	
}

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值