java 实现找出文档中的十个高频词

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;


public class li1 {

 static String[] a;
 public static void main(String[] args) throws IOException {
  //获取文件的内容
  //String msg=getFromTxt("D:/li.txt");
  //测试字符窜
  String msg="The thermometer had dropped to 18 degrees below zero, but still chose to sleep in the porch as usual. In the evening, the most familiar sight to me would be stars in the sky. Though they were a mere sprinkle of twinkling dots, yet I had become so accustomed to them that their occasional absence would bring me loneliness and ennui.";
  //消除标点符号的影响
  msg=delectChar(msg);
  //用空格分隔字符窜成字符数组
  String[] b=msg.split(" ");
  //存储键值对的键
  a=new String[b.length];
  int c=0;
  
  System.out.println(msg);
  //以键值对的形势存储单个字符窜,重复的键值对,键不变,值加1;初次出现的键值对,值置为1
  Map map=new HashMap<String,Integer>();
  for(int i=0;i<b.length;i++){
   boolean isHas=false;
   String sub=b[i];
   for(int j=0;j<i;j++){
    if(sub.equals(a[j])){
     map.put(sub,Integer.parseInt(map.get(sub)+"")+1);
     isHas=true;
     break;
    }
   }
   if(!isHas){
    a[c++]=sub;
    map.put(sub, 1);
   }
  }
  
  //对键值对的值用直接插入排序
  map=insertionsort(map);
  //输出前十个高频词
  for(int k=0;k<10 && k<a.length;k++){
   System.out.println(a[k]+"---------"+map.get(a[k]));
  }
 }
 //直接插入排序  从大到小
 public static Map insertionsort(Map map){
  for(int i=1;i<map.size();i++){
   int item=Integer.parseInt(map.get(a[i])+"");
   String bb=a[i];
   int j=i-1;
   while(item>Integer.parseInt(map.get(a[j])+"")){
    a[j+1]=a[j];
    if(j>=1){
     j=j-1;
    }else{
     j=j-1;
     break;
    }

   }
   a[j+1]=bb;

  }
  return map;
 }

 //消除标点符号
 public static String delectChar(String msg){
  msg=msg.replace(",", "");
  msg=msg.replace(".", "");
  msg=msg.replace("!", "");
  msg=msg.replace("?", "");
  msg=msg.replace(";", "");
  return msg;
 }
 //读取文本内容
 public static String getFromTxt(String filePath) throws IOException{
  File file=new File(filePath);
  BufferedReader br=new BufferedReader(new FileReader(file));
  String result ="";
  String data=null;
  while((data=br.readLine())!=null){
   result+=data;
  }
  return result;

 }
}

 

测试结果:

The thermometer had dropped to 18 degrees below zero but still chose to sleep in the porch as usual In the evening the most familiar sight to me would be stars in the sky Though they were a mere sprinkle of twinkling dots yet I had become so accustomed to them that their occasional absence would bring me loneliness and ennui
to---------4
the---------4
had---------2
in---------2
me---------2
would---------2
The---------1
thermometer---------1
dropped---------1
18---------1

 

 


 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值