成功将99收藏夹实现基于Lucene的站内全文搜索99收藏夹原来的站内收藏搜索是基于数据库的like
%%
语句,发现速度有点慢,于是想到用Lucene来实现站内收藏的按名字和共享收藏的连接标题来搜索的功能,昨天下午做好了,但是结果有点牵强人意。
首先我用Lucene把数据从数据库导出建立索引,代码如下,注:里面的ChinaTokene方法是调用www.sj110.com的中文分词组件
1 public class IntranetIndexer
2 {
3 private IndexWriter writer;
4 public IntranetIndexer( string dictory)
5 {
6 if (Directory.GetFiles(dictory).Length == 0 )
7 {
8 writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), true );
9 }
10 else
11 {
12 writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), false );
13 }
14 writer.SetUseCompoundFile( true );
15 }
16 public void AddDataReaderToIndex(SqlDataReader dr)
17 {
18 if (dr.HasRows)
19 {
20 string EndId = " 0 " ;
21 while (dr.Read())
22 {
23 /**//* int fieldCount = dr.FieldCount;
24 for (int i = 0; i < fieldCount; i++)
25 {
26 Document doc = new Document();
27 doc.Add(Field.Text(dr.GetName(i).ToString(),ChinaTokene(dr[i].ToString())));
28 writer.AddDocument(doc);
29 EndId = dr[0].ToString();
30 } */
31 Document doc = new Document();
32 doc.Add(Field.Keyword( " this_url " ,dr[ " this_url " ].ToString()));
33 doc.Add(Field.Keyword( " this_name " ,dr[ " this_name " ].ToString()));
34 doc.Add(Field.Text( " this_title " , ChinaTokene(dr[ " this_title " ].ToString())));
35 doc.Add(Field.UnIndexed( " all_title " ,dr[ " this_title " ].ToString()));
36 writer.AddDocument(doc);
37 EndId = dr[ " this_id " ].ToString();
38 }
39 using (StreamWriter sw = new StreamWriter( @" C:\Inetpub\wwwroot\pwqdream\endid.txt " ))
40 {
41 sw.WriteLine(EndId);
42 }
43 }
44 }
45
46 private string ChinaTokene( string s)
47 {
48 System.Text.StringBuilder sb = new System.Text.StringBuilder();
49 List < string > results = Sj110.Com.Chinese.Tokenizer.Tokenize(s);
50 foreach ( string str in results)
51 {
52 sb.AppendFormat( " {0} " ,str);
53 }
54 sb = sb.Remove(sb.Length - 2 , 1 );
55 return sb.ToString();
56 }
57 public void Close()
58 {
59 writer.Optimize();
60 writer.Close();
61 }
当然,最后面要把最后的数据库的那条记录的ID写入文本文件
因为在前面的得到DataReader的时候我们首先是根据这个文本文件来获得最后条记录的ID再查询的
代码如下
1 protected void buttonAddIndex_Click( object sender, EventArgs e)
2 {
3 int endId;
4 using (StreamReader sr = new StreamReader( @" C:\Inetpub\wwwroot\pwqdream\endid.txt " ))
5 {
6 try
7 {
8 endId = Convert.ToInt32(sr.ReadLine());
9 if (endId < 0 )
10 endId = 0 ;
11 }
12 catch
13 {
14 endId = 0 ;
15 }
16 }
17 string connStr = ConfigurationManager.AppSettings[ 0 ].ToString();
18 SqlConnection conn = new SqlConnection(connStr);
19 string selStr = " select this_id,this_title,this_url,this_name from userFav where this_id>@this_id " ;
20 SqlCommand comm = new SqlCommand(selStr,conn);
21 comm.Parameters.AddWithValue( " @this_id " ,endId);
22 conn.Open();
23 SqlDataReader dr = comm.ExecuteReader();
24 IntranetIndexer writer = new IntranetIndexer( @" C:\Inetpub\wwwroot\pwqdream\index " );
25 writer.AddDataReaderToIndex(dr);
26 writer.Close();
27 dr.Close();
28 dr.Dispose();
29 conn.Close();
30 }
查询的时候用的是ajax调用后台方法,多字段搜索,且都先分词了,结果很牵强,如果输入的是名字,那么取不到url和url,如果输入的象标题,则取不到url和名字,还有界面很不友好,下面是代码:
1function showUserShellFav(obj,pageNumber)
2 {
3 var selValue = document.getElementById( " searchSel " ).value;
4 if (obj != "" )
5 {
6 var t = document.getElementById( " titleTd " );
7 // var c = document.getElementById("contentTd");
8 t.innerHTML = " 搜索关于--<font color='red'> " + obj + " </font>--的共享收藏 " ;
9 // c.innerHTML="";
10 showLoad();
11 PwqzcDream.test.SelectUserShellFav(obj,pageNumber,selValue,onSelectUserShellFavCom);
12 }
13 }
14function onSelectUserShellFavCom(rel)
15 {
16 hideLoad();
17 var pageIndexTd = document.getElementById( " pageIndexTd " );
18 var c = document.getElementById( " contentTd " );
19 var dtContent = rel.value.Tables[ 0 ];
20 var dtCount = rel.value.Tables[ 1 ];
21 pageIndexTd.innerHTML = "" ;
22 for (var r = 0 ;r < dtCount.Rows.length;r ++ )
23 {
24 var cu = dtCount.Rows[r];
25 pageIndexTd.innerHTML += cu.html;
26 }
27 if (dtContent.Rows.length > 0 )
28 {
29 c.innerHTML = "" ;
30 if (PwqzcDream.MyDefault.IsLogin().value)
31 {
32 for (var row = 0 ;row < dtContent.Rows.length;row ++ )
33 {
34 var cur = dtContent.Rows[row];
35 c.innerHTML += " <li><a type=' " + cur.this_url + " ' style='color:#0088e4;position:relative;cursor:pointer;' οnmοusedοwn='MouseDownToMove(this,event);' οnmοusemοve='MouseMoveToMove(event);' οnmοuseup='MouseUpToMove(event);' οnclick='openLink(this.type);'> " + cur.this_title + " </a>  <font color='red'> " + cur.this_name + " </font>收藏</li> " ;
36 }
37 }
38 else
39 {
40 for (var row = 0 ;row < dtContent.Rows.length;row ++ )
41 {
42 var cur = dtContent.Rows[row];
43 c.innerHTML += " <li><a href=' " + cur.this_url + " ' style='color:#0088e4;' target='_blank'> " + cur.this_title + " </a>  <font color='red'> " + cur.this_name + " </font>收藏</li> " ;
44 }
45 }
46 }
47 else
48 {
49 c.innerHTML = " 对不起,没有找到相关的收藏! " ;
50 }
51 }
下面是按照用户名字搜索结果的抓图,当然,速度提升是很明显的
下面是按照标题搜索的结果的抓图:
同时也希望大家能够喜欢99收藏夹,谢谢大家!
首先我用Lucene把数据从数据库导出建立索引,代码如下,注:里面的ChinaTokene方法是调用www.sj110.com的中文分词组件
1 public class IntranetIndexer
2 {
3 private IndexWriter writer;
4 public IntranetIndexer( string dictory)
5 {
6 if (Directory.GetFiles(dictory).Length == 0 )
7 {
8 writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), true );
9 }
10 else
11 {
12 writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), false );
13 }
14 writer.SetUseCompoundFile( true );
15 }
16 public void AddDataReaderToIndex(SqlDataReader dr)
17 {
18 if (dr.HasRows)
19 {
20 string EndId = " 0 " ;
21 while (dr.Read())
22 {
23 /**//* int fieldCount = dr.FieldCount;
24 for (int i = 0; i < fieldCount; i++)
25 {
26 Document doc = new Document();
27 doc.Add(Field.Text(dr.GetName(i).ToString(),ChinaTokene(dr[i].ToString())));
28 writer.AddDocument(doc);
29 EndId = dr[0].ToString();
30 } */
31 Document doc = new Document();
32 doc.Add(Field.Keyword( " this_url " ,dr[ " this_url " ].ToString()));
33 doc.Add(Field.Keyword( " this_name " ,dr[ " this_name " ].ToString()));
34 doc.Add(Field.Text( " this_title " , ChinaTokene(dr[ " this_title " ].ToString())));
35 doc.Add(Field.UnIndexed( " all_title " ,dr[ " this_title " ].ToString()));
36 writer.AddDocument(doc);
37 EndId = dr[ " this_id " ].ToString();
38 }
39 using (StreamWriter sw = new StreamWriter( @" C:\Inetpub\wwwroot\pwqdream\endid.txt " ))
40 {
41 sw.WriteLine(EndId);
42 }
43 }
44 }
45
46 private string ChinaTokene( string s)
47 {
48 System.Text.StringBuilder sb = new System.Text.StringBuilder();
49 List < string > results = Sj110.Com.Chinese.Tokenizer.Tokenize(s);
50 foreach ( string str in results)
51 {
52 sb.AppendFormat( " {0} " ,str);
53 }
54 sb = sb.Remove(sb.Length - 2 , 1 );
55 return sb.ToString();
56 }
57 public void Close()
58 {
59 writer.Optimize();
60 writer.Close();
61 }
当然,最后面要把最后的数据库的那条记录的ID写入文本文件
因为在前面的得到DataReader的时候我们首先是根据这个文本文件来获得最后条记录的ID再查询的
代码如下
1 protected void buttonAddIndex_Click( object sender, EventArgs e)
2 {
3 int endId;
4 using (StreamReader sr = new StreamReader( @" C:\Inetpub\wwwroot\pwqdream\endid.txt " ))
5 {
6 try
7 {
8 endId = Convert.ToInt32(sr.ReadLine());
9 if (endId < 0 )
10 endId = 0 ;
11 }
12 catch
13 {
14 endId = 0 ;
15 }
16 }
17 string connStr = ConfigurationManager.AppSettings[ 0 ].ToString();
18 SqlConnection conn = new SqlConnection(connStr);
19 string selStr = " select this_id,this_title,this_url,this_name from userFav where this_id>@this_id " ;
20 SqlCommand comm = new SqlCommand(selStr,conn);
21 comm.Parameters.AddWithValue( " @this_id " ,endId);
22 conn.Open();
23 SqlDataReader dr = comm.ExecuteReader();
24 IntranetIndexer writer = new IntranetIndexer( @" C:\Inetpub\wwwroot\pwqdream\index " );
25 writer.AddDataReaderToIndex(dr);
26 writer.Close();
27 dr.Close();
28 dr.Dispose();
29 conn.Close();
30 }
查询的时候用的是ajax调用后台方法,多字段搜索,且都先分词了,结果很牵强,如果输入的是名字,那么取不到url和url,如果输入的象标题,则取不到url和名字,还有界面很不友好,下面是代码:
1function showUserShellFav(obj,pageNumber)
2 {
3 var selValue = document.getElementById( " searchSel " ).value;
4 if (obj != "" )
5 {
6 var t = document.getElementById( " titleTd " );
7 // var c = document.getElementById("contentTd");
8 t.innerHTML = " 搜索关于--<font color='red'> " + obj + " </font>--的共享收藏 " ;
9 // c.innerHTML="";
10 showLoad();
11 PwqzcDream.test.SelectUserShellFav(obj,pageNumber,selValue,onSelectUserShellFavCom);
12 }
13 }
14function onSelectUserShellFavCom(rel)
15 {
16 hideLoad();
17 var pageIndexTd = document.getElementById( " pageIndexTd " );
18 var c = document.getElementById( " contentTd " );
19 var dtContent = rel.value.Tables[ 0 ];
20 var dtCount = rel.value.Tables[ 1 ];
21 pageIndexTd.innerHTML = "" ;
22 for (var r = 0 ;r < dtCount.Rows.length;r ++ )
23 {
24 var cu = dtCount.Rows[r];
25 pageIndexTd.innerHTML += cu.html;
26 }
27 if (dtContent.Rows.length > 0 )
28 {
29 c.innerHTML = "" ;
30 if (PwqzcDream.MyDefault.IsLogin().value)
31 {
32 for (var row = 0 ;row < dtContent.Rows.length;row ++ )
33 {
34 var cur = dtContent.Rows[row];
35 c.innerHTML += " <li><a type=' " + cur.this_url + " ' style='color:#0088e4;position:relative;cursor:pointer;' οnmοusedοwn='MouseDownToMove(this,event);' οnmοusemοve='MouseMoveToMove(event);' οnmοuseup='MouseUpToMove(event);' οnclick='openLink(this.type);'> " + cur.this_title + " </a>  <font color='red'> " + cur.this_name + " </font>收藏</li> " ;
36 }
37 }
38 else
39 {
40 for (var row = 0 ;row < dtContent.Rows.length;row ++ )
41 {
42 var cur = dtContent.Rows[row];
43 c.innerHTML += " <li><a href=' " + cur.this_url + " ' style='color:#0088e4;' target='_blank'> " + cur.this_title + " </a>  <font color='red'> " + cur.this_name + " </font>收藏</li> " ;
44 }
45 }
46 }
47 else
48 {
49 c.innerHTML = " 对不起,没有找到相关的收藏! " ;
50 }
51 }
下面是按照用户名字搜索结果的抓图,当然,速度提升是很明显的
下面是按照标题搜索的结果的抓图:
同时也希望大家能够喜欢99收藏夹,谢谢大家!