前言:
首先,这是我参考 詹卫东老师的ppt讲座,自己写出来的程序语句,只能说是把老人家的思想,用程序语言表达出来而已,注意的是,这个程序只能做中文的分词,因为词库是中文的,算法需要改进一下才能进行英文分词,所以,只能说准原创,下面就给大家一下源码,参考一下:
正文开始:
首先,让我们看下程序运行的效果:
我们以//为结尾标识。
全部代码
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
using System.Collections;
namespace WordSegTest
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
Hashtable ht = new Hashtable();//初始一个哈希表,用于存放词库
string s2 = "";
string s1 = "";
private void button1_Click(object sender, EventArgs e)
{
s1 = textBox1.Text;//把目标语句放到s1中
Go();//开始进入词库,比对
}
private void Go()//主要查询模块
{
while (!IsEmpty(s1))
{
if (!IsSingle(s1))
{
string w;
if (!CheckLong(s1))
{
w = s1.Substring(0,4);
}
else
{
w = s1.Substring(0, s1.Length);
}
//w = s1.Substring(0, s1.Length);
if (Findit(w))
{
s2 = s2 + w+"/";
s1 = s1.Replace(w, "");
if (IsEmpty(s1))
{
PrintR();
break;
}
else
{
Go();
}
}
else
{
Nofind(w);//如果不能得出结果,进入分词查询模块
}
}
else
{
PrintR();
break;
}
}
}
private bool CheckLong(string w)//检查字符串长度以便决定切割长度
{
string t = w;
if (t.Length <=5)
{
return true;
}
else
{
return false;
}
}
private void PrintR()//输出结果
{
s2 = s2 + s1 + "/";
richTextBox1.Text = s2;
}
private void Nofind(string w)//子查询模块
{
while (!Findit(w))
{
if (IsSingle(w))
{
break;
}
else
{
w = w.Substring(0, w.Length - 1);
}
}
s2 = s2 + w+"/";
s1 = s1.Replace(w, "");
return;
}
private bool IsEmpty(string t)//判断是否为空
{
if (t.Length == 0 || t == null)
{
return true;
}
else
{
return false;
}
}
private bool IsSingle(string t)//判断是否为单字
{
if (t.Length == 1)
{
return true;
}
else
{
return false;
}
}
private bool Findit(string t)//查找关键字
{
object value = ht[t];
if (value != null)
{
return true;
}
else
{
return false;
}
}
private void Form1_Load(object sender, EventArgs e)
{
try
{
FileStream file = new FileStream(@"e:\CD.txt", FileMode.Open, FileAccess.Read);//我把词库放在了这里,你可以根据需要调整
StreamReader sr = new StreamReader(file);
string line = sr.ReadLine();
ht = new Hashtable();
while (line != null)
{
string[] oneRow = line.Split('\t');//把词读入哈希表
ht.Add(oneRow[0], oneRow[1]);//可以查阅相关哈希表的操作,就知道了
line = sr.ReadLine();
}
MessageBox.Show("词典初始化成功");
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}
private void button2_Click(object sender, EventArgs e)
{
richTextBox1.Text = "";
s1 = "";
s2 = "";
}
}
}
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
using System.Collections;
namespace WordSegTest
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
Hashtable ht = new Hashtable();//初始一个哈希表,用于存放词库
string s2 = "";
string s1 = "";
private void button1_Click(object sender, EventArgs e)
{
s1 = textBox1.Text;//把目标语句放到s1中
Go();//开始进入词库,比对
}
private void Go()//主要查询模块
{
while (!IsEmpty(s1))
{
if (!IsSingle(s1))
{
string w;
if (!CheckLong(s1))
{
w = s1.Substring(0,4);
}
else
{
w = s1.Substring(0, s1.Length);
}
//w = s1.Substring(0, s1.Length);
if (Findit(w))
{
s2 = s2 + w+"/";
s1 = s1.Replace(w, "");
if (IsEmpty(s1))
{
PrintR();
break;
}
else
{
Go();
}
}
else
{
Nofind(w);//如果不能得出结果,进入分词查询模块
}
}
else
{
PrintR();
break;
}
}
}
private bool CheckLong(string w)//检查字符串长度以便决定切割长度
{
string t = w;
if (t.Length <=5)
{
return true;
}
else
{
return false;
}
}
private void PrintR()//输出结果
{
s2 = s2 + s1 + "/";
richTextBox1.Text = s2;
}
private void Nofind(string w)//子查询模块
{
while (!Findit(w))
{
if (IsSingle(w))
{
break;
}
else
{
w = w.Substring(0, w.Length - 1);
}
}
s2 = s2 + w+"/";
s1 = s1.Replace(w, "");
return;
}
private bool IsEmpty(string t)//判断是否为空
{
if (t.Length == 0 || t == null)
{
return true;
}
else
{
return false;
}
}
private bool IsSingle(string t)//判断是否为单字
{
if (t.Length == 1)
{
return true;
}
else
{
return false;
}
}
private bool Findit(string t)//查找关键字
{
object value = ht[t];
if (value != null)
{
return true;
}
else
{
return false;
}
}
private void Form1_Load(object sender, EventArgs e)
{
try
{
FileStream file = new FileStream(@"e:\CD.txt", FileMode.Open, FileAccess.Read);//我把词库放在了这里,你可以根据需要调整
StreamReader sr = new StreamReader(file);
string line = sr.ReadLine();
ht = new Hashtable();
while (line != null)
{
string[] oneRow = line.Split('\t');//把词读入哈希表
ht.Add(oneRow[0], oneRow[1]);//可以查阅相关哈希表的操作,就知道了
line = sr.ReadLine();
}
MessageBox.Show("词典初始化成功");
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}
private void button2_Click(object sender, EventArgs e)
{
richTextBox1.Text = "";
s1 = "";
s2 = "";
}
}
}
看完代码,运行完了,感觉不是特别理解的,最好在关键部分,做调试。你就能看出程序是如何一个一个把词分开的了,下面给出那个ppt的下载,以及源程代码的下载