在CSDN,博客园找了一番Lucene.Net相关资料后,最后发现还是没有自己想要的,毕竟lucene.net版本一直在变,这里我用的是Lucene.Net 3.0的版本,demo是在http://www.dotlucene.net/ 网站里面找到的。方法很完善,API也比较全面,http://www.dotlucene.net/30648/lucene-net-api-search-demo。
不过demo里面讲解是检索文件内容服务,而我想要的是检索数据内容服务,我就在它的基础上改了一番。
过程:
1.建立索引,更新索引,删除索引
protected luseneTxt m=null;
protected void Page_Load(object sender, EventArgs e)
{
}
protected void creatIndexBt_Click(object sender, EventArgs e)
{
DateTime dt = DateTime.Now;
List<luseneTxt> l = new List<luseneTxt>();
bool flag = true;
int i = 0;
LuceneSearch ls = new LuceneSearch();
while (flag)
{
m = new luseneTxt();
m.text = "test";
m.path = "http://www.baidu.com/?i=";
m.title = "mofijeck ";
m.des = "12";
m.keyword = "34";
l.Add(m);
i++;
m = null;
if (i == 99999)
{
flag = false;
}
}
ls.CreatIndexByData(l);
l = new List<luseneTxt>();
TimeSpan ts = DateTime.Now - dt;
Label1.Text = "建立索引耗时" + ts.TotalSeconds + "秒";
}
protected void deleteIndexBt_Click(object sender, EventArgs e)
{
DateTime dt = DateTime.Now;
LuceneSearch ls = new LuceneSearch();
ls.DeleteIndex();
TimeSpan ts = DateTime.Now - dt;
Label1.Text = "删除索引耗时" + ts.TotalSeconds + "秒";
}
protected void updateIndexBt_Click(object sender, EventArgs e)
{
DateTime dt = DateTime.Now;
List<luseneTxt> l = new List<luseneTxt>();
bool flag = true;
int i = 0;
LuceneSearch ls = new LuceneSearch();
while (flag)
{
m = new luseneTxt();
m.text = "test";
m.path = "http://www.baidu.com/?i=";
m.title = "mofijeck ";
m.des = "12";
m.keyword = "34";
l.Add(m);
i++;
m = null;
if (i == 999999)
{
flag = false;
}
}
ls.UpdateIndexByData(l);
l = new List<luseneTxt>();
TimeSpan ts = DateTime.Now - dt;
Label1.Text = "建立索引耗时" + ts.TotalSeconds + "秒";
}
2.搜索
其实这里说白了就是增删改查,没有比较特别的东西,但是关于全文检索原理上的东西我也不是特别懂,网上很多大牛有相关博客解释的。
protected void search(string q) {
if (q == "") {
Label1.Text = "不能为空";
return;
}
TimeSpan duration=new TimeSpan();
DateTime dt = DateTime.Now;
LuceneSearch ls = new LuceneSearch();
int pageIndex = Request["Page"] == null ? 0 : int.Parse(Request["Page"]);
int pageSize = 10;
string colName = tbcol.Text.Trim() == "" ? "text" : tbcol.Text.Trim();
tbcol.Text = colName;
Repeater1.DataSource = ls.Search(q,colName, pageSize, pageIndex);
Repeater1.DataBind();
duration = DateTime.Now - dt;
dateTimeMsg = "耗时" + duration.TotalSeconds + "秒";
SqlPager sqlpager = new SqlPager("", pageSize, ls.getSearchCount(q, colName), "Search.aspx", "q=" + q, pageIndex);
pagerStr = sqlpager.GetShowPageStr("2", 1, 10);
}
前台显示:
<div>
<asp:repeater id="Repeater1" runat="server" >
<ItemTemplate>
<p><a href='<%# Eval("url")%>' class="link"><%# DataBinder.Eval(Container.DataItem, "title") %></a><br/>
<span class="sample">
<%# DataBinder.Eval(Container.DataItem, "lightText")%>
</span>
<br>
<span class="des">
<%# DataBinder.Eval(Container.DataItem, "des")%>
</span>
<br>
<span class="keyword">
<%# DataBinder.Eval(Container.DataItem, "keyword")%>
</span>
<br>
<span class="path">
<%# DataBinder.Eval(Container.DataItem, "url") %>
</span>
</p>
</ItemTemplate>
</asp:repeater>
</div>
<div class="paging">
<%=this.pagerStr%>
</div>
源码如下:
数据索引类(DataIndexer):
public class DataIndexer
{
private IndexWriter writer;
public DataIndexer(string directory)
{
writer = new IndexWriter(FSDirectory.Open(directory), new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED);
writer.UseCompoundFile = true;
}
public DataIndexer(string directory, bool create) {
writer = new IndexWriter(FSDirectory.Open(directory), new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), create, IndexWriter.MaxFieldLength.LIMITED);
writer.UseCompoundFile = true;
}
public void AddHtmlData(List<luseneTxt> list)
{
foreach (luseneTxt t in list)
{
AddHtmlDocument(t);
}
}
/// <summary>
/// Loads, parses and indexes an HTML file.
/// </summary>
/// <param name="path"></param>
public void AddHtmlDocument(luseneTxt lt)
{
Document doc = new Document();
doc.Add(new Field("text", lt.title, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("path", lt.path, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("title", lt.text, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("des", lt.des, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("keyword", lt.keyword, Field.Store.YES, Field.Index.ANALYZED));
writer.AddDocument(doc);
}
/// <summary>
/// Optimizes and save the index.
/// </summary>
public void Close()
{
writer.Optimize();
writer.Dispose();
}
public void Delete()
{
writer.DeleteAll();
}
}
Lucene.Net搜索检索类(LuceneSearch):
public class LuceneSearch
{
private string indexDirectory = System.Web.HttpContext.Current.Server.MapPath("~/App_Data/index");//默认
/// <summary>
/// 空构造函数
/// </summary>
public LuceneSearch() {
}
/// <summary>
/// 构造函数
/// </summary>
/// <param name="filePath">默认是~/App_Data/index</param>
public LuceneSearch(string filePath) {
indexDirectory = System.Web.HttpContext.Current.Server.MapPath(filePath);
}
/// <summary>
/// 文件索引
/// </summary>
/// <param name="url">文件路径</param>
/// <param name="pattenRegex">正则匹配 比如*.htm*</param>
public void CreatIndex(string url,string pattenRegex) {
string dataDirectory = System.Web.HttpContext.Current.Server.MapPath(url);
IntranetIndexer indexer = new IntranetIndexer(indexDirectory);
indexer.AddDirectory(new DirectoryInfo(dataDirectory), pattenRegex);
indexer.Close();
}
/// <summary>
/// 创建数据库索引
/// </summary>
public void CreatIndexByData(List<luseneTxt> list) {
DataIndexer indexer = new DataIndexer(indexDirectory);
indexer.AddHtmlData(list);
indexer.Close();
}
/// <summary>
/// 更新数据库索引
/// </summary>
public void UpdateIndexByData(List<luseneTxt> list)
{
DataIndexer indexer = new DataIndexer(indexDirectory, false);
indexer.AddHtmlData(list);
indexer.Close();
}
public void DeleteIndex() {
DataIndexer indexer = new DataIndexer(indexDirectory);
indexer.Delete();
indexer.Close();
}
#region 搜索
public List<SearchLucene> Search(string q, string colname, int pageSize, int page)
{
List<SearchLucene> list = new List<SearchLucene>();
// create the searcher
// index is placed in "index" subdirectory
DateTime start = DateTime.Now;
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));
// parse the query, "text" is the default field to search
var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, colname, analyzer);
Query query = parser.Parse(q);
// search
TopDocs hits = searcher.Search(query, 200);
int count = hits.TotalHits;
// create highlighter
IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;\">", "</span>");
SimpleFragmenter fragmenter = new SimpleFragmenter(80);
QueryScorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.TextFragmenter = fragmenter;
// initialize page
int startPage = page;
int endPage = (page + 1) * pageSize > count ? count : (page + 1) * pageSize;
// how many items we should show - less than defined at the end of the results
for (int i = startPage; i < endPage; i++)
{
// get the document from index
Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
String highText = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "...");
SearchLucene sm = new SearchLucene();
sm.title = doc.Get("title");
sm.des = doc.Get("des");
sm.url = doc.Get("path");
sm.text = doc.Get("text");
sm.keyword = doc.Get("keyword");
sm.lightText = highText;
list.Add(sm);
}
searcher.Dispose();
return list;
}
public int getSearchCount(string q, string colname)
{
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));
// parse the query, "text" is the default field to search
var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, colname, analyzer);
Query query = parser.Parse(q);
// search
TopDocs hits = searcher.Search(query, 200);
return hits.TotalHits;
}
#endregion
}
public class luseneTxt
{
public string title { set; get; }
public string path { set; get; }
public string text { set; get; }
public string des { set; get; }
public string keyword { set; get; }
}
public class SearchLucene{
public string title{set;get;}
public string des{set;get;}
public string keyword { set; get; }
public string url{set;get;}
public string text{set;get;}
public string lightText{set;get;}
}
分页类库:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace LuceneSolution.Code
{
/// <summary>
/// 初始化构造函数
/// 调用GetShowPageStr("2", 3, 10);
/// </summary>
public class SqlPager
{
#region 初始构造
public SqlPager(string Language)
{
this.unitLa(Language);
}
/// <summary>
///
/// </summary>
/// <param name="Language">默认是中文</param>
/// <param name="MyPageSize"></param>
/// <param name="Counts"></param>
/// <param name="MyParameters"></param>
/// <param name="PageIndex"></param>
public SqlPager(string Language, int MyPageSize, int Counts, string TurnUrlStr, string MyParameters, int PageIndex)
{
this.unitLa(Language);
this.PageSize = MyPageSize;//页码大小
this.TotalRecord = Counts;//总记录数
this.TurnUrlStr = TurnUrlStr;//要跳转的页面(当前页)
this.Parameters = MyParameters;//页面所带参数
this.PageIndex = PageIndex;//当前要显示的页码
//具体样式
this.PageString = "第[PageIndex]页/共[TotalPage]页 第每[PageSize]条/共[TotalRecord]条 [FirstStr] [PrevStr] [PageNumber] [NextStr] [LastStr] [TurnControl]";
}
/// <summary>
/// 初始化中英文
/// </summary>
/// <param name="Language"></param>
protected void unitLa(string Language)
{
if (Language == "EN")//英文默认
{
PageString = "Page [PageIndex]/[TotalPage] Info [PageSize]/[TotalRecord] [FirstStr] [PrevStr] [PageNumber] [NextStr] [LastStr] [TurnControl]";
TurnUrlStr = "";
PageSize = 20;
PageIndex = 1;
TotalRecord = 0;
FirstStr = "<<";
PrevStr = "<";
NextStr = ">";
LastStr = ">>";
NoRecord = "Total 0";
ButtonStr = " GO ";
TotalPage = 0;
TurnControl = "";
PageNumber = "";
ShowPageStr = "";
}
else//中文默认
{
PageString = "第[PageIndex]页/共[TotalPage]页 第每[PageSize]条/共[TotalRecord]条 [FirstStr] [PrevStr] [PageNumber] [NextStr] [LastStr] [TurnControl]";
TurnUrlStr = "";
PageSize = 20;
PageIndex = 1;
TotalRecord = 0;
FirstStr = "首页";
PrevStr = "上一页";
NextStr = "下一页";
LastStr = "尾页";
NoRecord = "共有 0 条信息";
ButtonStr = "跳转";
TotalPage = 0;
TurnControl = "";
PageNumber = "";
ShowPageStr = "";
}
}
#endregion
#region 字段 公共
/// <summary>
/// 具体样式(需要显示哪些内容)
/// </summary>
public string PageString;
/// <summary>
/// 跳转 的url链接
/// </summary>
public string TurnUrlStr;
/// <summary>
/// 跳转的url链接的参数前面不要加问号和与号
/// </summary>
public string Parameters;
/// <summary>
/// 每页记录数
/// </summary>
public int PageSize;
/// <summary>
/// 需要获取第几页的数据,从 1 开始
/// </summary>
public int PageIndex;
/// <summary>
/// 总记录数
/// </summary>
public int TotalRecord;
/// <summary>
/// 首页 显示样式
/// </summary>
public string FirstStr;
/// <summary>
/// 上一页 显示样式
/// </summary>
public string PrevStr;
/// <summary>
/// 下一页 显示样式
/// </summary>
public string NextStr;
/// <summary>
/// 尾页 显示样式
/// </summary>
public string LastStr;
/// <summary>
/// 没有记录时显示的信息
/// </summary>
public string NoRecord;
/// <summary>
/// 跳转按钮文字
/// </summary>
public string ButtonStr;
#endregion
#region 字段 私有
/// <summary>
/// 总页数
/// </summary>
private int TotalPage;
/// <summary>
/// 跳转控件
/// </summary>
private string TurnControl;
/// <summary>
/// 存储中间的内容(如 1 2 3 页码)
/// </summary>
private string PageNumber;
/// <summary>
/// 显示页码的总字符
/// </summary>
private string ShowPageStr;
#endregion
#region 获取最终页码显示
/// <summary>
/// 获取最终页码显示
/// </summary>
/// <param name="Style">样式(1 是上下页,2 是显示一批页码)</param>
/// <param name="PlaceIn">固定当前页在第几个位置(样式2用)</param>
/// <param name="ShowNum">一页显示几个页码(样式2用)</param>
/// <returns>最终页码显示</returns>
public string GetShowPageStr(string Style, int PlaceIn, int ShowNum)
{
TotalPage = (TotalRecord + PageSize - 1) / PageSize;
//超出最小页码
if (PageIndex < 1)
{
PageIndex = 1;
}
//超出最大页码
if (PageIndex > TotalPage)
{
PageIndex = TotalPage;
}
//跳转
TurnControl = "<input value='" + PageIndex.ToString() + "' id='txtPageGo' name='txtPageGo' type='text' style='width:35px;' οnkeydοwn=\"if(event.keyCode==13)window.location.href='" + TurnUrlStr + "?Page=' + document.getElementById('txtPageGo').value + '" + "&" + Parameters + "'\"><input name='btnGo' type='button' id='btnGo' value='" + ButtonStr + "' οnclick=\"javascript:window.location.href='" + TurnUrlStr + "?Page=' + document.getElementById('txtPageGo').value + '" + "&" + Parameters + "'\">";
//没有记录
if (TotalRecord <= 0)
{
ShowPageStr = NoRecord;
}
//有记录
else
{
//只有一页
if (TotalPage <= 1)
{
ShowPageStr = "共有 " + TotalRecord.ToString() + " 条信息";
}
//不止一页
else
{
//第一页
if (PageIndex == 1)
{
}
else
{
FirstStr = "<a href=\"" + TurnUrlStr + "?Page=1" + "&" + Parameters + "\">" + FirstStr + "</a>";
PrevStr = "<a href=\"" + TurnUrlStr + "?Page=" + Convert.ToString(PageIndex - 1) + "&" + Parameters + "\">" + PrevStr + "</a>";
}
//最后一页
if (PageIndex == TotalPage)
{
}
else
{
NextStr = "<a href=\"" + TurnUrlStr + "?Page=" + Convert.ToString(PageIndex + 1) + "&" + Parameters + "\">" + NextStr + "</a>";
LastStr = "<a href=\"" + TurnUrlStr + "?Page=" + TotalPage + "&" + Parameters + "\">" + LastStr + "</a>";
}
}
#region 样式一: 共X条信息 第N页/共M页 首页 上一页 下一页 尾页 跳转
if (Style == "1")
{
PageNumber = "";
}
#endregion
#region 样式2: 共X条信息 第N页/共M页 首页 1 2 3 尾页 跳转 当前页码定位在第 PlaceIn 位
if (Style == "2")
{
int PageTemp = 0;
if (PlaceIn < 1 || PlaceIn > ShowNum)
{
PlaceIn = 2;//前面保持有两个,则当前页码定位在第3位
}
else
{
PlaceIn = PlaceIn - 1;//前面保持有 PlaceIn - 1 个
}
string strPageNum = "";
string strTempNow = "";
//当页码总数 <= 一页要显示的页码数
if (TotalPage <= ShowNum)
{
for (int i = 1; i <= TotalPage; i++)
{
strTempNow = Convert.ToString(PageTemp + i);
//当前页不显示超链接
if (PageIndex == PageTemp + i)
{
strPageNum = strPageNum + "<b>" + strTempNow + "</b> ";
}
else
{
strPageNum = strPageNum + "<a href=\"" + TurnUrlStr + "?Page=" + strTempNow + "&" + Parameters + "\">[" + strTempNow + "]</a> ";
}
}
}
else
{
//当前页码 <= 固定位置 PlaceIn
if (PageIndex <= PlaceIn)
{
for (int i = 1; i <= ShowNum; i++)
{
strTempNow = Convert.ToString(i);
//当前页不显示超链接
if (PageIndex == i)
{
strPageNum = strPageNum + "<b>" + strTempNow + "</b> ";
}
else
{
strPageNum = strPageNum + "<a href=\"" + TurnUrlStr + "?Page=" + strTempNow + "&" + Parameters + "\">[" + strTempNow + "]</a> ";
}
}
}
//当前页码 >=最后一批该显示
else if (PageIndex >= TotalPage - ShowNum + PlaceIn + 1)
{
//第一个显示的数字
PageTemp = TotalPage - ShowNum + 1;
for (int i = PageTemp; i <= TotalPage; i++)
{
if (i > TotalPage) break;
strTempNow = Convert.ToString(i);
//当前页不显示超链接
if (PageIndex == i)
{
strPageNum = strPageNum + "<b>" + strTempNow + "</b> ";
}
else
{
strPageNum = strPageNum + "<a href=\"" + TurnUrlStr + "?Page=" + strTempNow + "&" + Parameters + "\">[" + strTempNow + "]</a> ";
}
}
}
else
{
for (int i = PageIndex - PlaceIn; i < PageIndex - PlaceIn + ShowNum; i++)
{
strTempNow = Convert.ToString(i);
//当前页不显示超链接
if (i == PageIndex)
{
strPageNum = strPageNum + "<b>" + strTempNow + "</b> ";
}
else
{
strPageNum = strPageNum + "<a href=\"" + TurnUrlStr + "?Page=" + strTempNow + "&" + Parameters + "\">[" + strTempNow + "]</a> ";
}
}
}
}
PageNumber = strPageNum;
}
#endregion
}
ShowPageStr = PageString;
ShowPageStr = ShowPageStr.Replace("[PageIndex]", PageIndex.ToString());//
ShowPageStr = ShowPageStr.Replace("[TotalPage]", TotalPage.ToString());//
ShowPageStr = ShowPageStr.Replace("[PageSize]", PageSize.ToString());//
ShowPageStr = ShowPageStr.Replace("[TotalRecord]", TotalRecord.ToString());//
ShowPageStr = ShowPageStr.Replace("[FirstStr]", FirstStr);//
ShowPageStr = ShowPageStr.Replace("[PrevStr]", PrevStr);//
ShowPageStr = ShowPageStr.Replace("[NextStr]", NextStr);//
ShowPageStr = ShowPageStr.Replace("[LastStr]", LastStr);//
ShowPageStr = ShowPageStr.Replace("[TurnControl]", TurnControl);//
ShowPageStr = ShowPageStr.Replace("[PageNumber]", PageNumber);//
return ShowPageStr;
}
#endregion
#region 简要说明
/*
.Net通用分页类(存储过程分页版,可以完全自定义显示样式,包括中英显示).
大概思路是:主要是利用存储过程在数据库进行分页,
所以在这个类里面不涉及到数据的处理,只进行页码的显示格式处理,
配合SQL2005 的 ROW_NUMBER () 功能,能够达到更好的效果.
显示样式可以完全自定义,用图片也可以,只是要把字符串拼成图片的Html
效果图:http://images.cnblogs.com/cnblogs_com/84ww/128905/r_PageStore.gif
*/
#endregion
#region 调用实例
/*
protected void BindData()
{
PageStyle MyPage = new PageStyle("EN");//创建类
MyPage.PageSize = MyPageSize;//页码大小
MyPage.TotalRecord = Counts;//总记录数
MyPage.TurnUrlStr = "AskSearchList.aspx";//要跳转的页面(当前页)
MyPage.Parameters = MyParameters;//页面所带参数
MyPage.PageIndex = PageIndex;//当前要显示的页码
//具体样式
MyPage.PageString = "第[PageIndex]页/共[TotalPage]页 第每[PageSize]条/共[TotalRecord]条 [FirstStr] [PrevStr] [PageNumber] [NextStr] [LastStr] [TurnControl]";
string strPage = MyPage.GetShowPageStr("2", 3, 10);
labShowPage.Text = strPage;
}
*/
#endregion
}
}
全部的代码都奉献上了,如果大家有更好的写法,更好的见解,欢迎拍砖。