在现代日常应用中数据中有重复值,但我们需要过滤成一条的需求越来越多,那么如何在程序中设计过滤算法,同时还要考虑算法效率,因此这成了值得关注的问题
下面对两种算法进行比较
一.测试数据10 0000,过滤后结果只9810条
二.算法说明
方法一:用hashtable来保存数据列表,用hashtable的ContainsKey来判断是否重复来过滤.
方法二:用string的contains来判断是否重复来过滤.
三.算法代码
/// <summary>
/// 方法一.用hashtable来过滤重复值
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void Button1_Click(object sender, EventArgs e)
{
DateTime begin = DateTime.Now;
Hashtable hash = new Hashtable();
ds = SqlHelper.ExecuteDataset(DBstr, CommandType.Text, sql);
//过滤机制
int len = ds.Tables[0].Rows.Count;
// string tmp = "";
for (int i = 0; i < len; i++)
{
if (!hash.ContainsKey(ds.Tables[0].Rows[i][0]))
{
hash.Add(ds.Tables[0].Rows[i][0], ds.Tables[0].Rows[i][0]);
}
}
DataTable dt = new DataTable();
DataColumn dcValue = new DataColumn("value", typeof(string));
dt.Columns.Add(dcValue);
//取得hasttable中的数据放到datatable中
IDictionaryEnumerator myEnumerator = hash.GetEnumerator();
while (myEnumerator.MoveNext())
{
dt.Rows.Add(new string[] { Convert.ToString(myEnumerator.Key)});
}
//绑定数据
GridView1.DataSource = dt;
GridView1.DataBind();
;
lblmsg.Text = string.Format("共:{0}条,耗时:{1}", dt.Rows.Count, ((double)(((TimeSpan)(DateTime.Now - begin)).TotalSeconds)).ToString());
}
/// <summary>
///方法二. 采用string的Contains来过滤重复值
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void Button2_Click(object sender, EventArgs e)
{
DateTime begin = DateTime.Now;
StringBuilder strfilter = new StringBuilder();
ds = SqlHelper.ExecuteDataset(DBstr, CommandType.Text, sql);
//过滤机制
int len = ds.Tables[0].Rows.Count;
// string tmp = "";
DataTable dt = new DataTable();
DataColumn dcValue = new DataColumn("value", typeof(string));
dt.Columns.Add(dcValue);
for (int i = 0; i < len; i++)
{
if (!("|" + strfilter.ToString()).Contains("|" + ds.Tables[0].Rows[i][0].ToString() + "|"))
{
dt.Rows.Add(ds.Tables[0].Rows[i][0]);
strfilter.Append(ds.Tables[0].Rows[i][0].ToString() + "|");
}
}
GridView1.DataSource = dt;
GridView1.DataBind();
;
lblmsg.Text = string.Format("共:{0}条,耗时:{1}", dt.Rows.Count, ((double)(((TimeSpan)(DateTime.Now - begin)).TotalSeconds)).ToString());
}
四.测试结果
方法一:hashtable
方法二:string字符过滤
Label 共:9810条,耗时:67.921875
比较两种方法耗时相关67倍.