今天从网上找了一些关于Lucene.NET的资料,自己做个demo测试了一下。
Lucene.NET 是开源的项目,从网上很容易找到源代码,新建一个web测试页面和一个控制台程序,控制台程序用来创建lucene的索引
控制台代码如下:
using
System;
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.IO;
using Lucene.Net.Analysis.Cn;
using Lucene.Net.Analysis.CJK;
using System.Data.SqlClient;
using System.Data;
namespace Test
{
class TokenizerTest
{
public static string connstr = " server=.;database=test;uid=sa;pwd= " ;
public static SqlConnection mycon;
static void Main( string [] args)
{
SqlDataReader myred = OpenTable();
IndexWriter writer = CreateIndex(myred);
Console.WriteLine( " 索引创建完成 " );
Console.Read();
}
public static SqlDataReader OpenTable()
{
mycon = new SqlConnection(connstr);
mycon.Open();
SqlCommand mycom = new SqlCommand( " select * from Article " , mycon);
DataTable tb = new DataTable();
SqlDataAdapter adp = new SqlDataAdapter(mycom);
adp.Fill(tb);
DataTable d = tb;
return mycom.ExecuteReader();
}
// 对数据库中的字段建立索引
public static IndexWriter CreateIndex(SqlDataReader myred)
{
IndexWriter writer = new IndexWriter( " D:/index/ " , new ChineseAnalyzer(), true ); // 索引的存储位置
try
{
while (myred.Read())
{
Document doc = new Document();
doc.Add( new Field( " id " , myred[ " id " ].ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add( new Field( " title " , myred[ " title " ].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
doc.Add( new Field( " summary " , myred[ " summary " ].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
}
writer.Optimize(); // 优化索引
writer.Close();
myred.Close();
}
catch (Exception e)
{
// Response.Write(e);
myred.Close();
}
return writer;
}
}
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.IO;
using Lucene.Net.Analysis.Cn;
using Lucene.Net.Analysis.CJK;
using System.Data.SqlClient;
using System.Data;
namespace Test
{
class TokenizerTest
{
public static string connstr = " server=.;database=test;uid=sa;pwd= " ;
public static SqlConnection mycon;
static void Main( string [] args)
{
SqlDataReader myred = OpenTable();
IndexWriter writer = CreateIndex(myred);
Console.WriteLine( " 索引创建完成 " );
Console.Read();
}
public static SqlDataReader OpenTable()
{
mycon = new SqlConnection(connstr);
mycon.Open();
SqlCommand mycom = new SqlCommand( " select * from Article " , mycon);
DataTable tb = new DataTable();
SqlDataAdapter adp = new SqlDataAdapter(mycom);
adp.Fill(tb);
DataTable d = tb;
return mycom.ExecuteReader();
}
// 对数据库中的字段建立索引
public static IndexWriter CreateIndex(SqlDataReader myred)
{
IndexWriter writer = new IndexWriter( " D:/index/ " , new ChineseAnalyzer(), true ); // 索引的存储位置
try
{
while (myred.Read())
{
Document doc = new Document();
doc.Add( new Field( " id " , myred[ " id " ].ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add( new Field( " title " , myred[ " title " ].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
doc.Add( new Field( " summary " , myred[ " summary " ].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
}
writer.Optimize(); // 优化索引
writer.Close();
myred.Close();
}
catch (Exception e)
{
// Response.Write(e);
myred.Close();
}
return writer;
}
}
}
创建好索引之后,就可以使用了测试页面代码:
protected
void
Search_Click(
object
sender, EventArgs e)
{
DataRow dr;
DataTable luctb = new DataTable();
luctb.Columns.Add( " id " );
luctb.Columns.Add( " title " );
luctb.Columns.Add( " summary " );
Stopwatch sw = new Stopwatch();
sw.Start();
Hits myhit = seacher( this .tj.Text.Trim());
for ( int i = 0 ; i < myhit.Length(); i ++ )
{
Document doc = myhit.Doc(i);
dr = luctb.NewRow();
dr[ 0 ] = doc.Get( " id " ).ToString();
dr[ 1 ] = doc.Get( " title " ).ToString();
dr[ 2 ] = doc.Get( " summary " ).ToString();
luctb.Rows.Add(dr);
dr.AcceptChanges();
}
sw.Stop();
this .Label1.Text = " 共 " + luctb.Rows.Count + " 行 " + sw.ElapsedMilliseconds.ToString();
this .gview.DataSource = luctb;
this .gview.DataBind();
DataTable sqltb = new DataTable();
mycon = new SqlConnection(connstr);
mycon.Open();
Stopwatch swsql = new Stopwatch();
swsql.Start();
SqlCommand mycom = new SqlCommand( string .Format( " select Id,title,summary from Article where summary like '%{0}%' or title like '%{0}%' " , this .tj.Text.Trim()), mycon);
DataTable tb = new DataTable();
SqlDataAdapter adp = new SqlDataAdapter(mycom);
adp.Fill(tb);
swsql.Stop();
this .Label2.Text = " 共 " + tb.Rows.Count + " 行 " + swsql.ElapsedMilliseconds.ToString();
this .gviewsql.DataSource = tb;
this .gviewsql.DataBind();
{
DataRow dr;
DataTable luctb = new DataTable();
luctb.Columns.Add( " id " );
luctb.Columns.Add( " title " );
luctb.Columns.Add( " summary " );
Stopwatch sw = new Stopwatch();
sw.Start();
Hits myhit = seacher( this .tj.Text.Trim());
for ( int i = 0 ; i < myhit.Length(); i ++ )
{
Document doc = myhit.Doc(i);
dr = luctb.NewRow();
dr[ 0 ] = doc.Get( " id " ).ToString();
dr[ 1 ] = doc.Get( " title " ).ToString();
dr[ 2 ] = doc.Get( " summary " ).ToString();
luctb.Rows.Add(dr);
dr.AcceptChanges();
}
sw.Stop();
this .Label1.Text = " 共 " + luctb.Rows.Count + " 行 " + sw.ElapsedMilliseconds.ToString();
this .gview.DataSource = luctb;
this .gview.DataBind();
DataTable sqltb = new DataTable();
mycon = new SqlConnection(connstr);
mycon.Open();
Stopwatch swsql = new Stopwatch();
swsql.Start();
SqlCommand mycom = new SqlCommand( string .Format( " select Id,title,summary from Article where summary like '%{0}%' or title like '%{0}%' " , this .tj.Text.Trim()), mycon);
DataTable tb = new DataTable();
SqlDataAdapter adp = new SqlDataAdapter(mycom);
adp.Fill(tb);
swsql.Stop();
this .Label2.Text = " 共 " + tb.Rows.Count + " 行 " + swsql.ElapsedMilliseconds.ToString();
this .gviewsql.DataSource = tb;
this .gviewsql.DataBind();
}
代码中分别利用lucene和sql语句从Article查询title和summary两个字段中包含要查询字符的记录。表中共20w+条数据,
测试结果:lucene耗时:共35行16毫秒sql耗时:共35行2331毫秒