Lucene和PanGu(盘古分词)

先用盘古分词生成索引,然后对进行查询语句进行分词查询。比较简单。

using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using Lucene.Net.Analysis; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.QueryParsers; using Lucene.Net.Search; using Lucene.Net.Store; using Lucene.China; using Lucene.Net.Analysis.PanGu; using System.Data; using System.Data.SqlClient; using System.IO; using Lucene.Net.Analysis.Standard; using PanGu; using System.Text; using System.Diagnostics; /// <summary> /// 获取数据源 /// </summary> /// <returns></returns> public DataTable GetTable() { DataTable dt = new DataTable(); using (SqlConnection con = new SqlConnection("Data Source=HQ-M-YSQ;Initial Catalog=SpiderDB;Integrated Security=True")) { SqlDataAdapter da = new SqlDataAdapter("select * from Fourm", con); da.Fill(dt); da.Dispose(); } return dt; } /// <summary> /// 生成索引 /// </summary> public void CreateIndex() { String Paths=Server.MapPath("./")+"DataIndex"; PanGuAnalyzer analyzer=new PanGuAnalyzer(); IndexWriter indexWritr = new IndexWriter(Paths, analyzer, true); DataTable dt=GetTable(); foreach (DataRow item in dt.Rows) { Document doc = new Document(); doc.Add(new Field("PID", item["id"].ToString(),Field.Store.YES,Field.Index.TOKENIZED)); doc.Add(new Field("PTitle", item["title"].ToString(), Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("PContext", item["context"].ToString(), Field.Store.YES, Field.Index.TOKENIZED)); indexWritr.AddDocument(doc); } indexWritr.Close(); } protected void Button1_Click1(object sender, EventArgs e) { CreateIndex(); } protected void Button2_Click(object sender, EventArgs e) { Search(txtContext.Text.Trim()); } //查询 public void Search(String keyWord) { Stopwatch sw = new Stopwatch(); StringBuilder builder = new StringBuilder(); PanGuTokenizer tokenizer = new PanGuTokenizer(); ICollection<WordInfo> list= tokenizer.SegmentToWordInfos(keyWord); foreach (WordInfo word in list) { if (word == null) { continue; } keyWord = keyWord + word + ","; builder.AppendFormat("{0}^{1}", word.Word, (int)Math.Pow(3, word.Rank)); } keyWord = keyWord.Substring(0, keyWord.Length - 1); PanGuAnalyzer analyzer = new PanGuAnalyzer(true); MultiFieldQueryParser Parser = new MultiFieldQueryParser(new string[] { "PTitle", "PContext" }, analyzer); Query query = Parser.Parse(builder.ToString()); String Paths = Server.MapPath("./") + "DataIndex"; IndexSearcher search = new IndexSearcher(Paths); sw.Start(); Hits hits= search.Search(query); sw.Stop(); StringBuilder sb = new StringBuilder(); for (var i = 0; i < hits.Length(); i++) { PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color='red'>", "</font>"); //创建高亮,输入HTML代码和 盘古对象Semgent PanGu.HighLight.Highlighter highter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment()); Document doc = hits.Doc(i); String title = highter.GetBestFragment(keyWord, doc.Get("PTitle")); String context = highter.GetBestFragment(keyWord, doc.Get("PContext")); sb.Append("标题:" + title + "<br/>" + context + doc.Get("PContext") + "<br/>"); } search.Close(); Response.Write(sb + "用时:" + (sw.Elapsed.TotalMilliseconds).ToString()); }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值