查找英文文章中功能词,并对出现次数进行排序的功能实现

public class FuncWordToMap {
 public static void toMap(File[] filelist, Map map){
 for(int i=0;i<filelist.length;i++){
    String source = ReadFileByBuffer.ReadFileByBufferdeReader(filelist[i].getAbsolutePath());
    String[] s = source.split(" ");
    for(int j=0;j<s.length;j++){
      String str=s[j];
      String[] ss=str.split("_");
      String word=ss[0];
      String type=ss[1];
     if(type.equals("IN")||type.equals("WP")||type.equals("PP")||type.equals("DT")||type.equals("CC")||type.equals("MD")||type.equals("RP")||type.equals("TO")){
       if(map.containsKey(word)){
        Integer time=(Integer)map.get(word);
        int t=time.intValue();
        time=new Integer(t+1);
        map.put(word, time);
       }else{
        map.put(word, new Integer(1));
       }
      }
       }
    }
 }

}

public class FuncWordMapSort {
 public static List<Map.Entry<String, Integer>> wordSort(HashMap map1,OutputStream out) throws IOException{
   List<Map.Entry<String, Integer>> list = new LinkedList<Map.Entry<String, Integer>>();//
   list.addAll(map1.entrySet());

  Collections.sort(list, new Comparator<Map.Entry<String, Integer>>()
  { 
     public int compare(Map.Entry obj1, Map.Entry obj2)
     {
      //从高往低排序
         if(Integer.parseInt(obj1.getValue().toString())<Integer.parseInt(obj2.getValue().toString()))
             return 1;
         if(Integer.parseInt(obj1.getValue().toString())==Integer.parseInt(obj2.getValue().toString()))
             return 0;
         else
            return -1;
     }
   } 
    ); 

}

打印到arff文件中如下:

@relation amazonReview
@attribute "the" numeric
@attribute "a" numeric
@attribute "and" numeric
@attribute "to" numeric
@attribute "of" numeric
@attribute "is" numeric
@attribute "I" numeric
@attribute "in" numeric
@attribute "it" numeric
@attribute "that" numeric
@attribute "for" numeric
@attribute "you" numeric
@attribute "this" numeric
@attribute "with" numeric
@attribute "are" numeric
@attribute "on" numeric............

@data
20,13,7,7,8,6,14,2,9,5,6,8,5,5,0,6,5,1,2,1,3,2,1,1,2,2,1,0,1,7,0,3,0,0,1,1,0,1,0,1,2,2,0,1,0,1,0,0,0,0,0,0,1,3,1,0,0,5,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Chandler
15,8,8,5,5,1,13,14,1,5,5,4,0,2,5,1,0,1,4,0,2,0,5,2,1,3,0,1,0,1,0,1,0,0,1,0,0,0,3,5,4,2,0,0,1,2,1,0,0,0,1,2,0,0,2,1,3,3,0,2,0,0,1,0,0,2,0,0,0,2,0,1,0,0,0,0,3,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Chandler
9,4,8,3,4,2,10,2,2,3,5,1,3,0,0,2,1,2,3,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,2,0,1,0,2,0,2,0,1,0,0,0,0,0,0,0,0,0,1,2,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Chandler
7,11,11,6,1

。。。。。。。以上是对每个文章中对应该词出现的频率值

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值