初试Lucene.net搜索及高亮分页

Code
using System;
using System.Data;
using System.IO;
using System.Text.RegularExpressions;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Search.Highlight;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using System.Configuration;

namespace So
{
    public class BaseSearch : System.Web.UI.Page
    {
        变量声明#region 变量声明
        /**//// <summary>
        
/// 搜索结果数据
        
/// </summary>
        public DataTable Results = new DataTable();

        /**//// <summary>
        
/// 开始索引
        
/// </summary>
        public int startAt;

        /**//// <summary>
        
/// First item on page (user format).
        
/// </summary>
        public int fromItem;

        /**//// <summary>
        
/// Last item on page (user format).
        
/// </summary>
        public int toItem;

        /**//// <summary>
        
/// 搜索的结果总数
        
/// </summary>
        public int total;

        /**//// <summary>
        
/// 搜索所用时间
        
/// </summary>
        public TimeSpan duration;

        /**//// <summary>
        
/// 每页显示结果项条数
        
/// </summary>
        public int maxResults = 10;

        /**//// <summary>
        
/// 是否启用网页缓存功能
        
/// </summary>
        public bool EnableCache;
        /**//// <summary>
        
/// 缓存URL
        
/// </summary>
        public string CacheURL;
        /**//// <summary>
        
/// 索引文件存放的路径
        
/// </summary>
        public string IndexDiectory;
        private string m_Query;
        public Lucene.Net.Store.Directory dir;

        #endregion 变量声明


        取得查询目标索引的缓存#region 取得查询目标索引的缓存
        public void GetIndexDir(string IndexKey, string IndexDiectory)
        {
            //object obj = Cache[IndexKey];
            
//if (obj != null)
            
//{
            
//    dir = (Lucene.Net.Store.Directory)obj;
            
//}
            
//else
            
//{
            
//    dir = new Lucene.Net.Store.RAMDirectory(IndexDiectory);
            
//    Cache.Insert(IndexKey, dir, null, DateTime.Now.AddMinutes(2), TimeSpan.Zero);
            
//}
            dir = new Lucene.Net.Store.RAMDirectory(IndexDiectory);
        }
        #endregion





        得到定长的字符串#region 得到定长的字符串
        /**//// <summary>
        
/// 得到定长的字符串
        
/// </summary>
        
/// <param name="p_Text">原字符串</param>
        
/// <param name="p_Length">长度</param>
        
/// <param name="p_ExtraText">多余部分显示字符</param>
        
/// <returns></returns>
        public string GetLengthText(string p_Text, int p_Length, string p_ExtraText)
        {
            return (p_Text.Length > p_Length) ? (p_Text.Substring(045) + p_ExtraText) : p_Text;
        }
        #endregion

        取得两个参数中的最小值#region 取得两个参数中的最小值
        /**//// <summary>
        
/// 取得两个参数中的最小值
        
/// </summary>
        
/// <param name="first">参数一</param>
        
/// <param name="second">参数二</param>
        
/// <returns>最小值</returns>
        public int smallerOf(int first, int second)
        {
            return first < second ? first : second;
        }
        #endregion

        检测开始位置#region 检测开始位置
        /**//// <summary>
        
/// Initializes startAt value. Checks for bad values.
        
/// </summary>
        
/// <returns></returns>
        public int initStartAt()
        {
            try
            {
                int sa = Convert.ToInt32(this.Request.Params["start"]);

                // too small starting item, return first page
                if (sa < 0)
                    return 0;

                // too big starting item, return last page
                if (sa >= total - 1)
                {
                    return lastPageStartsAt;
                }

                return sa;
            }
            catch
            {
                return 0;
            }
        }
        #endregion

        最后一页的第一项#region 最后一页的第一项

        /**//// <summary>
        
/// First item of the last page
        
/// </summary>
        public int lastPageStartsAt
        {
            get
            {
                return pageCount * maxResults;
            }
        }
        public int pageCount
        {
            get
            {
                return (total - 1) / maxResults; // floor
            }
        }
        #endregion

        取得高亮连接#region 取得高亮连接

        /**//// <summary>
        
/// 取得高亮连接
        
/// </summary>
        
/// <param name="p_Body">处理内容</param>
        
/// <param name="p_KeyWords">关键词</param>
        
/// <returns></returns>
        public string SimpleHighLighter(string p_Body, string p_KeyWords, string p_Before,
            string p_After, int p_MaxLength)
        {
            string[] KeyWords = p_KeyWords.Trim().Split(' ');

            //if (p_Body.Length > p_MaxLength)
            
//{
            
//    if (p_Body.IndexOf(KeyWords[0]) > 10)
            
//    {
            
//        try
            
//        {
            
//            if ((p_Body.Length - 10) > p_MaxLength)
            
//                p_Body = p_Body.Substring(p_Body.IndexOf(KeyWords[0]) - 10, p_MaxLength) + "";
            
//            else
            
//                p_Body = p_Body.Substring(p_Body.IndexOf(KeyWords[0]) - 10) + "";
            
//        }
            
//        catch
            
//        { }
            
//    }
            
//    else
            
//        p_Body = p_Body.Substring(0, p_MaxLength) + "";

            
//}


            for (int i = 0; i < KeyWords.Length; i++)
            {
                p_Body = p_Body.Replace(KeyWords[i], p_Before + KeyWords[i] + p_After);

            }


            return p_Body;

        }
        #endregion

        属性#region 属性
        /**//// <summary>
        
/// 查询关键词
        
/// </summary>
        public string Query
        {
            get
            {
                return m_Query;
            }
            set
            {
                m_Query = value;
            }
        }
        #endregion
    }
}

using System;
using System.Data;
using System.IO;
using System.Text.RegularExpressions;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Search.Highlight;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using System.Configuration;

namespace So.News
{
    public class NewsSearch : BaseSearch
    {
        public NewsSearch()
        {
            this.IndexDiectory = ConfigurationManager.AppSettings["NewsIndexPath"];
        }

        处理搜索并将信息转换为可显示结果数据源#region 处理搜索并将信息转换为可显示结果数据源

        /**//// <summary>
        
/// Does the search and stores the information about the results.
        
/// </summary>
        public void search()
        {


            // 索引目录
            
//string indexDirectory = Server.MapPath(ConfigurationSettings.AppSettings["EnableCache"] );  
            
//创建一个Searcher用于搜索

            
//记录查询开始的时间
            DateTime start = DateTime.Now;
            this.GetIndexDir("HDC.News", IndexDiectory);
            IndexSearcher searcher = new IndexSearcher(dir);
            //从"body"字段搜索
            
//Console.WriteLine(this.Query);

            Lucene.Net.Analysis.Analyzer OneAnalyzer = new StandardAnalyzer();
            QueryParser parser = new QueryParser("newsContent", OneAnalyzer);
            Query query = parser.Parse(this.Query);



            //创建结果记录集
            
//定义字段
            this.Results.Columns.Add("ArticleID"typeof(int));
            this.Results.Columns.Add("ArticleClassID"typeof(int));
            this.Results.Columns.Add("className"typeof(string));
            this.Results.Columns.Add("titleImg"typeof(string));
            this.Results.Columns.Add("updateTime"typeof(DateTime));
            this.Results.Columns.Add("source"typeof(string));
            this.Results.Columns.Add("title"typeof(string));
            this.Results.Columns.Add("summary"typeof(string));




            Sort sort = new Sort(new SortField("ArticleID", SortField.DOC, true));
            //Hits是搜索结果记录集,不过是Lucene自己的格式,需要格式化成标准输出
            Hits hits = searcher.Search(query, sort);

            //结果个数
            this.total = hits.Length();

            /**/////创建高亮显示
            //Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color="#C60A00">", "</font>"), new QueryScorer(query));
            
//highlighter.TextFragmenter = new SimpleFragmenter(160);
            
//highlighter.MaxDocBytesToAnalyze = 256;

            
// initialize startAt
            this.startAt = initStartAt();

            // how many items we should show - less than defined at the end of the results
            int resultsCount = smallerOf(total, this.maxResults + this.startAt);

            for (int i = startAt; i < resultsCount; i++)
            {
                Document doc = hits.Doc(i);
                DataRow row = this.Results.NewRow();

                row["ArticleID"] = Convert.ToInt32(doc.Get("ArticleID"));
                row["ArticleClassID"] = Convert.ToInt32(doc.Get("ArticleClassID"));
                string summary = doc.Get("summary");

                row["summary"] = this.SimpleHighLighter(summary, this.Query,"<font color="#C60A00">""</font>",226);
                row["className"] = doc.Get("className");
                row["titleImg"] = doc.Get("titleImg");
                row["updateTime"] = Convert.ToDateTime(doc.Get("updateTime"));
                row["source"] = doc.Get("source");
                row["title"] = doc.Get("title");
                this.Results.Rows.Add(row);
            }
            searcher.Close();

            // result information

            this.fromItem = startAt + 1;
            this.toItem = smallerOf(startAt + maxResults, total);
            //记录查询使用的时间
            this.duration = DateTime.Now - start;
        }
        #endregion

        页面底航连接#region 页面底航连接
        /**//// <summary>
        
/// 页面底航连接
        
/// </summary>
        public DataTable Paging
        {
            get
            {
                int pageNumber = (startAt + maxResults - 1) / maxResults;

                DataTable dt = new DataTable();
                dt.Columns.Add("html"typeof(string));

                //增加第一页链接
                DataRow tar = dt.NewRow();
                if (startAt >= maxResults)
                    tar["html"] = "<EM><a href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + (startAt - maxResults) + ""><IMG src="images/b_pre.gif"></a></EM>";
                else
                    tar["html"] = "<EM><IMG src="images/b_pre.gif"></EM>";
                dt.Rows.Add(tar);


                int previousPagesCount = 7;

                DataRow ar = dt.NewRow();
                ar["html"] = pagingItemHtml(startAt, pageNumber + 1false);
                dt.Rows.Add(ar);

                for (int i = pageNumber - 1; i >= 0 && i >= pageNumber - previousPagesCount; i--)
                {
                    int step = i - pageNumber;
                    DataRow r = dt.NewRow();
                    r["html"] = pagingItemHtml(startAt + (maxResults * step), i + 1true);

                    dt.Rows.InsertAt(r, 1);
                }

                int nextPagesCount = 8;
                for (int i = pageNumber + 1; i <= pageCount && i <= pageNumber + nextPagesCount; i++)
                {
                    int step = i - pageNumber;
                    DataRow r = dt.NewRow();
                    r["html"] = pagingItemHtml(startAt + (maxResults * step), i + 1true);

                    dt.Rows.Add(r);
                }

                //增加第一页链接
                DataRow far = dt.NewRow();
                if (pageNumber < pageCount)
                    far["html"] = "<EM><a class="blue1" href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + (startAt + maxResults) + ""><IMG src="images/b_nextpage.gif"></a></EM>";
                else
                    far["html"] = "<EM><IMG src="images/b_nextpage.gif"></EM>";
                dt.Rows.Add(far);
                return dt;
            }
        }


        页面连接列表#region 页面连接列表

        /**//// <summary>
        
/// 页面连接列表
        
/// </summary>
        
/// <param name="start">开始</param>
        
/// <param name="number">显示数量</param>
        
/// <param name="active">活动</param>
        
/// <returns></returns>
        public string pagingItemHtml(int start, int number, bool active)
        {

            if (active)
                return "<VAR><a href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + start + "">" + number + "</a></VAR>";
            else
                return "<VAR class=on>" + number + "</VAR>";
        }
        #endregion


        #endregion



    }
}







分类: .NET
标签: Lucene.net, 搜索

本文转自孤独侠客博客园博客,原文链接:http://www.cnblogs.com/lonely7345/archive/2008/06/27/1231354.html,如需转载请自行联系原作者
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值