lucene.net2.9搜索Demo

最新推荐文章于 2023-05-29 02:03:01 发布

寒色

最新推荐文章于 2023-05-29 02:03:01 发布

阅读量4.4k

点赞数

分类专栏：【寒色】Lucene笔记文章标签： query lucene string null filter border

本文链接：https://blog.csdn.net/xuezhongsong/article/details/6087343

版权

【寒色】Lucene笔记专栏收录该内容

6 篇文章 0 订阅

订阅专栏

上次用到lucene.net时，其版本还是2.4，现在升级到2.9后，以前的很多方法被抛弃不用，到3.0后，就被删除不用。所以现在把以前弃置不用的类、方法和属性全部用新的替换之，先做个查询demo。

开发环境:vs2010(.net4)+lucene.net2.9+盘古分词2.3.1和高亮显示（也可以用lucene自带的分词和高亮显示）

具体代码如下：

protected void GetSearchPageDemo() { Stopwatch watch = new Stopwatch(); watch.Start(); //开始计时 string strPath = tbIndexPath.Text; //索引文件所在路径 string strKeyWords = tbKeyWords.Text; //关键字 int pageIndex = 1; //当前页 int intToTalCount = 0; //总记录数 int CurrenPage = 0; //总页数 int pageSize = 10; //页面大小 if (tbPageIndex.Text != "") { pageIndex = Convert.ToInt32(tbPageIndex.Text); } strKeyWords = GetKeyWordsSplitBySpace(strKeyWords, new PanGuTokenizer()); //处理后的关键字 //System.IO.DirectoryInfo dirIndex = new System.IO.DirectoryInfo(strPath); IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(strPath)), true); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new PanGuAnalyzer(true); //使用盘古分词控件 ///多字段搜索 string[] quryFeiled = { strKeyWords, strKeyWords, strKeyWords }; string[] filed = { "Title", "Content", "keyName" }; Query query = MultiFieldQueryParser.Parse(Version.LUCENE_29, quryFeiled, filed, analyzer); 单字段搜索 //QueryParser queryParser = new QueryParser(Version.LUCENE_29, "Title", analyzer); //Query quey = queryParser.Parse(strKeyWords); List<ScoreDoc> hits = null; pageIndex = Math.Max(1, pageIndex); hits = GetScoreDocDemo1(query, searcher, pageIndex, pageSize, out intToTalCount); CurrenPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1; pageIndex = Math.Min(CurrenPage, pageIndex); StringBuilder sb = new StringBuilder(); sb.AppendFormat("<p>{2}/{3} 总数：{0} 显示前{1}条记录</p>", intToTalCount, hits.Count, pageIndex.ToString(), CurrenPage.ToString()); sb.Append("<table border=/"0/" cellpadding=/"0/" cellspacing=/"0/" width=/"100%/">"); //使用盘古分词控件来显示高亮 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); string titele = ""; for (int i = 0; i < hits.Count; i++) { Document doc = searcher.Doc(hits[i].doc); PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment()); highlighter.FragmentSize = 120; //获取文本大小 sb.Append("<tr>"); titele = highlighter.GetBestFragment(strKeyWords, doc.Get("Title")); sb.AppendFormat("<td><font size='5'>{0}</font></td>", titele == "" ? doc.Get("Title") : titele); sb.AppendFormat("<td>{0}</td>", doc.Get("visits")); sb.AppendFormat("<td>{0}</td>", doc.Get("keyName")); sb.AppendFormat("<td>{0}</td>", doc.Get("Idate")); sb.AppendFormat("<td>{0}</td>", hits[i].score); sb.Append("</tr>"); sb.Append("<tr>"); sb.AppendFormat("<td colspan='5'><div style="padding:10px" mce_style="padding:10px">{0}</div></td>", highlighter.GetBestFragment(strKeyWords, doc.Get("Content"))); sb.Append("</tr>"); } sb.Append("</table>"); reader.Close(); watch.Stop(); lbInformation.Text = "用时：" + watch.Elapsed.Hours + ":" + watch.Elapsed.Minutes + ":" + watch.Elapsed.Seconds + ":" + watch.Elapsed.Milliseconds + sb.ToString(); } /// <summary> /// 获取lucene查询结果集使用TopScoreDocCollector+TopDocs /// </summary> /// <param name="query">Query</param> /// <param name="searcher">Searcher</param> /// <param name="pageIndex">当前页</param> /// <param name="pageSize">页面大小</param> /// <param name="intToTalCount">结果数目</param> /// <returns></returns> protected List<ScoreDoc> GetScoreDocDemo1(Query query, Searcher searcher, int pageIndex, int pageSize, out int intToTalCount) { List<ScoreDoc> listScoreDoc = new List<ScoreDoc>(); TopScoreDocCollector collector = TopScoreDocCollector.create(searcher.MaxDoc(), true); searcher.Search(query, collector); intToTalCount = collector.GetTotalHits(); if (intToTalCount < 1) return listScoreDoc; int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1; pageIndex = Math.Min(currPage, pageIndex); TopDocs topdoce = collector.TopDocs((pageIndex - 1) * pageSize, pageSize); //获取指定页的记录 ScoreDoc[] hits = topdoce.scoreDocs; listScoreDoc = hits.ToList(); return listScoreDoc; } /// <summary> /// 获取lucene查询结果集使用TopDocs /// </summary> /// <param name="query">Query</param> /// <param name="searcher">Searcher</param> /// <param name="pageIndex">当前页</param> /// <param name="pageSize">页面大小</param> /// <param name="intToTalCount">结果数目</param> /// <returns></returns> protected List<ScoreDoc> GetScoreDocDemo2(Query query, Searcher searcher, int pageIndex, int pageSize, out int intToTalCount) { List<ScoreDoc> listScoreDoc = new List<ScoreDoc>(); TopDocs topDocs = searcher.Search(query, (Filter)null, pageIndex * pageSize); ScoreDoc[] hits = topDocs.scoreDocs; intToTalCount = topDocs.totalHits; if (intToTalCount < 1) return listScoreDoc; int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1; int top = pageSize; //当前页记录数目 if (pageIndex == currPage) { top = intToTalCount - (currPage - 1) * pageSize; } listScoreDoc = hits.Reverse().Take(top).Reverse().ToList(); //两次反转获取当前页的记录集合 return listScoreDoc; } /// <summary> /// 获取lucene查询结果集使用TopDocs /// </summary> /// <param name="query">Query</param> /// <param name="searcher">Searcher</param> /// <param name="sort"></param> /// <param name="pageIndex">当前页</param> /// <param name="pageSize">页面大小</param> /// <param name="intToTalCount">结果数目</param> /// <returns></returns> protected List<ScoreDoc> GetScoreDocDemo3(Query query, Searcher searcher, Sort sort, int pageIndex, int pageSize, out int intToTalCount) { List<ScoreDoc> listScoreDoc = new List<ScoreDoc>(); TopDocs topDocs = searcher.Search(query, (Filter)null, pageIndex * pageSize, sort); ScoreDoc[] hits = topDocs.scoreDocs; intToTalCount = topDocs.totalHits; if (intToTalCount < 1) return listScoreDoc; int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1; int top = pageSize; //当前页记录数目 pageIndex = Math.Min(currPage, pageIndex); if (pageIndex == currPage) { top = intToTalCount - (currPage - 1) * pageSize; } listScoreDoc = hits.Reverse().Take(top).Reverse().ToList(); //两次反转获取当前页的记录集合 return listScoreDoc; } /// <summary> /// 拆分关键词 /// </summary> /// <param name="keywords"></param> /// <param name="ktTokenizer"></param> /// <returns></returns> public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer) { StringBuilder result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); }

应用的网站上的具体效果如下图，也可以进入网站试试具体效果：lucene.net模糊查询

寒色

关注

0
点赞
踩
5

收藏

觉得还不错? 一键收藏
17
评论
lucene.net2.9搜索Demo

上次用到lucene.net时，其版本还是2.4，现在升级到2.9后，以前的很多方法被抛弃不用，到3.0后，就被删除不用。所以现在把以前弃置不用的类、方法和属性全部用新的替换之，先做个查询demo。开发环境:vs2010(.net4)+lucene.net2.9+盘古分词2.3.1和高亮显示（也可以用lucene自带的分词和高亮显示）具体代码如下：protected void btSearch_Click(object sender, EventArgs e) {
复制链接

扫一扫