成功将99收藏夹实现基于Lucene的站内全文搜索

99收藏夹原来的站内收藏搜索是基于数据库的like%%语句,发现速度有点慢,于是想到用Lucene来实现站内收藏的按名字和共享收藏的连接标题来搜索的功能,昨天下午做好了,但是结果有点牵强人意。
首先我用Lucene把数据从数据库导出建立索引,代码如下,注:里面的ChinaTokene方法是调用 www.sj110.com的中文分词组件
ContractedBlock.gif ExpandedBlockStart.gif
 1None.gifpublic class IntranetIndexer
 2ExpandedBlockStart.gifContractedBlock.gifdot.gif{
 3InBlock.gif    private IndexWriter writer;
 4InBlock.gif    public IntranetIndexer(string dictory)
 5ExpandedSubBlockStart.gifContractedSubBlock.gif    dot.gif{
 6InBlock.gif        if (Directory.GetFiles(dictory).Length == 0)
 7ExpandedSubBlockStart.gifContractedSubBlock.gif        dot.gif{
 8InBlock.gif            writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), true);
 9ExpandedSubBlockEnd.gif        }

10InBlock.gif        else
11ExpandedSubBlockStart.gifContractedSubBlock.gif        dot.gif{
12InBlock.gif            writer = new IndexWriter(dictory,new WhitespaceAnalyzer(),false);
13ExpandedSubBlockEnd.gif        }

14InBlock.gif        writer.SetUseCompoundFile(true);
15ExpandedSubBlockEnd.gif    }

16InBlock.gif    public void AddDataReaderToIndex(SqlDataReader dr)
17ExpandedSubBlockStart.gifContractedSubBlock.gif    dot.gif{
18InBlock.gif        if (dr.HasRows)
19ExpandedSubBlockStart.gifContractedSubBlock.gif        dot.gif{
20InBlock.gif            string EndId="0";
21InBlock.gif            while (dr.Read())
22ExpandedSubBlockStart.gifContractedSubBlock.gif            dot.gif{
23ExpandedSubBlockStart.gifContractedSubBlock.gif                /**//*int fieldCount = dr.FieldCount;
24InBlock.gif                for (int i = 0; i < fieldCount; i++)
25InBlock.gif                {
26InBlock.gif                    Document doc = new Document();
27InBlock.gif                    doc.Add(Field.Text(dr.GetName(i).ToString(),ChinaTokene(dr[i].ToString())));
28InBlock.gif                    writer.AddDocument(doc);
29InBlock.gif                    EndId = dr[0].ToString();
30ExpandedSubBlockEnd.gif                }*/

31InBlock.gif                Document doc = new Document();
32InBlock.gif                doc.Add(Field.Keyword("this_url",dr["this_url"].ToString()));
33InBlock.gif                doc.Add(Field.Keyword("this_name",dr["this_name"].ToString()));
34InBlock.gif                doc.Add(Field.Text("this_title", ChinaTokene(dr["this_title"].ToString())));
35InBlock.gif                doc.Add(Field.UnIndexed("all_title",dr["this_title"].ToString()));
36InBlock.gif                writer.AddDocument(doc);
37InBlock.gif                EndId = dr["this_id"].ToString();
38ExpandedSubBlockEnd.gif            }

39InBlock.gif            using (StreamWriter sw = new StreamWriter(@"C:\Inetpub\wwwroot\pwqdream\endid.txt"))
40ExpandedSubBlockStart.gifContractedSubBlock.gif            dot.gif{
41InBlock.gif                sw.WriteLine(EndId);
42ExpandedSubBlockEnd.gif            }

43ExpandedSubBlockEnd.gif        }

44ExpandedSubBlockEnd.gif    }

45InBlock.gif
46InBlock.gif    private string ChinaTokene(string s)
47ExpandedSubBlockStart.gifContractedSubBlock.gif    dot.gif{
48InBlock.gif        System.Text.StringBuilder sb = new System.Text.StringBuilder();
49InBlock.gif        List<string> results = Sj110.Com.Chinese.Tokenizer.Tokenize(s);
50InBlock.gif        foreach (string str in results)
51ExpandedSubBlockStart.gifContractedSubBlock.gif        dot.gif{
52InBlock.gif            sb.AppendFormat("{0} ",str);
53ExpandedSubBlockEnd.gif        }

54InBlock.gif        sb = sb.Remove(sb.Length-2,1);
55InBlock.gif        return sb.ToString();
56ExpandedSubBlockEnd.gif    }

57InBlock.gif    public void Close()
58ExpandedSubBlockStart.gifContractedSubBlock.gif    dot.gif{
59InBlock.gif        writer.Optimize();
60InBlock.gif        writer.Close();
61ExpandedSubBlockEnd.gif    }

当然,最后面要把最后的数据库的那条记录的ID写入文本文件
因为在前面的得到DataReader的时候我们首先是根据这个文本文件来获得最后条记录的ID再查询的
代码如下
ContractedBlock.gif ExpandedBlockStart.gif
 1None.gifprotected void buttonAddIndex_Click(object sender, EventArgs e)
 2ExpandedBlockStart.gifContractedBlock.gif    dot.gif{
 3InBlock.gif        int endId;
 4InBlock.gif        using (StreamReader sr = new StreamReader(@"C:\Inetpub\wwwroot\pwqdream\endid.txt"))
 5ExpandedSubBlockStart.gifContractedSubBlock.gif        dot.gif{
 6InBlock.gif            try
 7ExpandedSubBlockStart.gifContractedSubBlock.gif            dot.gif{
 8InBlock.gif                endId = Convert.ToInt32(sr.ReadLine());
 9InBlock.gif                if (endId < 0)
10InBlock.gif                    endId = 0;
11ExpandedSubBlockEnd.gif            }

12InBlock.gif            catch
13ExpandedSubBlockStart.gifContractedSubBlock.gif            dot.gif{
14InBlock.gif                endId = 0;
15ExpandedSubBlockEnd.gif            }

16ExpandedSubBlockEnd.gif        }

17InBlock.gif        string connStr = ConfigurationManager.AppSettings[0].ToString();
18InBlock.gif        SqlConnection conn = new SqlConnection(connStr);
19InBlock.gif        string selStr = "select this_id,this_title,this_url,this_name from userFav where this_id>@this_id";
20InBlock.gif        SqlCommand comm = new SqlCommand(selStr,conn);
21InBlock.gif        comm.Parameters.AddWithValue("@this_id",endId);
22InBlock.gif        conn.Open();
23InBlock.gif        SqlDataReader dr = comm.ExecuteReader();
24InBlock.gif        IntranetIndexer writer = new IntranetIndexer(@"C:\Inetpub\wwwroot\pwqdream\index");
25InBlock.gif        writer.AddDataReaderToIndex(dr);
26InBlock.gif        writer.Close();
27InBlock.gif        dr.Close();
28InBlock.gif        dr.Dispose();
29InBlock.gif        conn.Close();
30ExpandedBlockEnd.gif    }

查询的时候用的是ajax调用后台方法,多字段搜索,且都先分词了,结果很牵强,如果输入的是名字,那么取不到url和url,如果输入的象标题,则取不到url和名字,还有界面很不友好,下面是代码:
ContractedBlock.gif ExpandedBlockStart.gif
 1None.giffunction showUserShellFav(obj,pageNumber)
 2ExpandedBlockStart.gifContractedBlock.gifdot.gif{
 3InBlock.gif   var selValue = document.getElementById("searchSel").value;
 4InBlock.gif   if(obj!="")
 5ExpandedSubBlockStart.gifContractedSubBlock.gif   dot.gif{
 6InBlock.gif      var t = document.getElementById("titleTd");
 7InBlock.gif      //var c = document.getElementById("contentTd");
 8InBlock.gif      t.innerHTML="搜索关于--<font color='red'>"+obj+"</font>--的共享收藏";
 9InBlock.gif      //c.innerHTML="";
10InBlock.gif      showLoad();
11InBlock.gif      PwqzcDream.test.SelectUserShellFav(obj,pageNumber,selValue,onSelectUserShellFavCom);
12ExpandedSubBlockEnd.gif   }

13ExpandedBlockEnd.gif}

14None.giffunction onSelectUserShellFavCom(rel)
15ExpandedBlockStart.gifContractedBlock.gifdot.gif{
16InBlock.gif   hideLoad();
17InBlock.gif   var pageIndexTd = document.getElementById("pageIndexTd"); 
18InBlock.gif   var c = document.getElementById("contentTd");
19InBlock.gif   var dtContent = rel.value.Tables[0];
20InBlock.gif   var dtCount = rel.value.Tables[1];
21InBlock.gif   pageIndexTd.innerHTML = "";
22InBlock.gif   for(var r=0;r<dtCount.Rows.length;r++)
23ExpandedSubBlockStart.gifContractedSubBlock.gif   dot.gif{
24InBlock.gif      var cu = dtCount.Rows[r];
25InBlock.gif      pageIndexTd.innerHTML += cu.html;
26ExpandedSubBlockEnd.gif   }

27InBlock.gif   if(dtContent.Rows.length>0)
28ExpandedSubBlockStart.gifContractedSubBlock.gif   dot.gif{
29InBlock.gif      c.innerHTML = "";
30InBlock.gif      if(PwqzcDream.MyDefault.IsLogin().value)
31ExpandedSubBlockStart.gifContractedSubBlock.gif      dot.gif{
32InBlock.gif         for(var row=0;row<dtContent.Rows.length;row++)
33ExpandedSubBlockStart.gifContractedSubBlock.gif         dot.gif{
34InBlock.gif             var cur = dtContent.Rows[row];
35InBlock.gif             c.innerHTML+="<li><a type='"+cur.this_url+"' style='color:#0088e4;position:relative;cursor:pointer;' οnmοusedοwn='MouseDownToMove(this,event);' οnmοusemοve='MouseMoveToMove(event);' οnmοuseup='MouseUpToMove(event);' οnclick='openLink(this.type);'>"+cur.this_title+"</a>&nbsp;&nbsp<font color='red'>"+cur.this_name+"</font>收藏</li>";
36ExpandedSubBlockEnd.gif         }

37ExpandedSubBlockEnd.gif      }

38InBlock.gif      else
39ExpandedSubBlockStart.gifContractedSubBlock.gif      dot.gif{
40InBlock.gif          for(var row=0;row<dtContent.Rows.length;row++)
41ExpandedSubBlockStart.gifContractedSubBlock.gif          dot.gif{
42InBlock.gif             var cur = dtContent.Rows[row];
43InBlock.gif             c.innerHTML+="<li><a href='"+cur.this_url+"' style='color:#0088e4;' target='_blank'>"+cur.this_title+"</a>&nbsp;&nbsp<font color='red'>"+cur.this_name+"</font>收藏</li>";
44ExpandedSubBlockEnd.gif          }

45ExpandedSubBlockEnd.gif      }

46ExpandedSubBlockEnd.gif   }

47InBlock.gif   else
48ExpandedSubBlockStart.gifContractedSubBlock.gif   dot.gif{
49InBlock.gif      c.innerHTML = "对不起,没有找到相关的收藏!";
50ExpandedSubBlockEnd.gif   }

51ExpandedBlockEnd.gif}

下面是按照用户名字搜索结果的抓图,当然,速度提升是很明显的
good1.gif
下面是按照标题搜索的结果的抓图:
good2.gif
 同时也希望大家能够喜欢99收藏夹,谢谢大家!

转载于:https://www.cnblogs.com/pwqzc/archive/2006/03/23/356447.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值