term frequency 词频

/*
*michzel new java files
*
*Created on 2010-10-2
*
*Copyright 2010 Anchora info company. all rights reserved
*/

package TFIDF;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import org.wltea.analyzer.IKSegmentation;
import org.wltea.analyzer.Lexeme;

public class IKtf
{
public static void main(String[] args)
{
String filepathfrom=System.getProperty("user.dir")+"\\南宋生活顾问1.txt";
String filepathto=System.getProperty("user.dir")+"\\resulttest.txt";

String text=TextManager.Read(filepathfrom);
List<WordsCounter> wordsCountList=new ArrayList<WordsCounter>();
List<String> wordsList=new ArrayList<String>();
//分词部分代码
System.out.println(text);
IKSegmentation ikSeg = new IKSegmentation(new StringReader(text) , false);
try
{
Lexeme l = null;

while( (l = ikSeg.next()) != null)
{
System.out.println(l);
wordsList.add(l.getLexemeText());
System.out.println(+wordsList.size());
}
}
catch (IOException e)
{
e.printStackTrace();
}
System.out.println("***************");

//统计词汇频数
for(String word:wordsList)
{
boolean match=false;
for(int i=0;i<wordsCountList.size();i++)
{
if(word.equals(wordsCountList.get(i).text))
{
wordsCountList.get(i).count++;
match=true;
break;
}
}
if(match==false)
{
wordsCountList.add(new WordsCounter(word,1));
}
}


//将统计结果写入文本文档
String resultString="";
for(WordsCounter wordCounter:wordsCountList)
{
resultString+=wordCounter.text+":"+wordCounter.count+"\r\n";
System.out.println(wordCounter.text+":"+wordCounter.count);
double tf= (double) wordCounter.count/wordsList.size();
System.out.println(+tf);
}
TextManager.Write(filepathto,resultString);
}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值