汉语分词算法

winform中拖一个按钮,两个RichText using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; namespace cutWord { public partial class Form1 : Form { public Form1() { InitializeComponent(); } private void button1_Click(object sender, EventArgs e) { List > L = cutWord(this.richTextBox1.Text); this.richTextBox2.Text = " "; int count = 0; for (int i = L.Count-1; i > 0; i--) { if (count > 15) { break; } count++; this.richTextBox2.Text += L[i].Key + "[ " + L[i].Value + "]/n "; } } private List > cutWord(string article) { Dictionary D = new Dictionary (); //if len(escape(x)) /len(x)=6 then isGB=true else isGB=false //HttpUtility..:: System.Text.RegularExpressions.Regex Re = new System.Text.RegularExpressions.Regex(@ "[^/u4e00-/u9fa5]+ "); for (int l = 2; l <= 4; l++) { for (int i = 0; i < article.Length-l; i++) { string theWord = article.Substring(i, l); if (Re.Replace(theWord, " ") == theWord) { if (D.ContainsKey(theWord)) { D[theWord]++; } else { D.Add(theWord,1); } } } } List > L = new List > (); foreach (KeyValuePair K in D) { if (K.Value > 1) { L.Add(K); } } L.Sort( delegate(KeyValuePair a, KeyValuePair b) { if (a.Value == b.Value) { if (a.Key.Length == b.Key.Length) { return 0; } else { if (a.Key.Length > b.Key.Length) { return 1; } else { return -1; } } } if (a.Value > b.Value) { return 1; } else { return -1; } } ); return (L); } } }  
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值