正 文:
C#中文分词算法:IKAnalyzerNet的使用。
using IKAnalyzerNet;
using IKAnalyzerNet.dict;
using Lucene.Net.Analysis;
using Wintellect.PowerCollections;
使用方法举例:
private void button1_Click(object sender, EventArgs e)
{
String testString = richTextBox2.Text ;
String slen = testString.Length.ToString(); IKAnalyzer ika = new IKAnalyzer(); System.IO.TextReader r = new System.IO.StringReader(testString);
TokenStream ts = ika.TokenStream("TestField", r);
int m = 0;
long begin = System.DateTime.Now.Ticks;
for (Token t = ts.Next(); t != null; t = ts.Next())
{
m++;
richTextBox1.Text += m + ")" + (t.StartOffset() + "," + t.EndOffset() + " = " + t.TermText()) + "\r\n";
} int end = (int)((System.DateTime.Now.Ticks - begin) / 10000); richTextBox1.Text += ("长度:" + slen + " 耗时: " + (end) + "ms" + " 分词个数:" + m + " 效率(词/秒):" + ((int)(m * 1.0f / (end) * 1000))) + "\r\n";
}
下载地址:
中文分词算法:IKAnalyzerNet.rar
C#中文分词算法:IKAnalyzerNet的使用。
需要先引用dll库文件 IKAnalyzerNet.dll + Lucene.Net.dll + PowerCollections.dll 这三个,打包在本文下方,使用VS2008调试通过:
using IKAnalyzerNet;
using IKAnalyzerNet.dict;
using Lucene.Net.Analysis;
using Wintellect.PowerCollections;
使用方法举例:
private void button1_Click(object sender, EventArgs e)
{
String testString = richTextBox2.Text ;
String slen = testString.Length.ToString(); IKAnalyzer ika = new IKAnalyzer(); System.IO.TextReader r = new System.IO.StringReader(testString);
TokenStream ts = ika.TokenStream("TestField", r);
int m = 0;
long begin = System.DateTime.Now.Ticks;
for (Token t = ts.Next(); t != null; t = ts.Next())
{
m++;
richTextBox1.Text += m + ")" + (t.StartOffset() + "," + t.EndOffset() + " = " + t.TermText()) + "\r\n";
} int end = (int)((System.DateTime.Now.Ticks - begin) / 10000); richTextBox1.Text += ("长度:" + slen + " 耗时: " + (end) + "ms" + " 分词个数:" + m + " 效率(词/秒):" + ((int)(m * 1.0f / (end) * 1000))) + "\r\n";
}
截图如下:
下载地址:
中文分词算法:IKAnalyzerNet.rar