//创建分析器
StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
// 创建一个IndexWriter
//IndexWriter(生产索引文件的目录;索引分析器;是否覆盖原有索引的文件,如果是true,在建立索引之前先将目录清空)
IndexWriter writer = new IndexWriter(@"E:/Winform/LuceneDemo/Index/", standardAnalyzer, true);
using (StreamReader sr = new StreamReader(@"E:/Winform/LuceneDemo/Index.txt", Encoding.GetEncoding("GBK")))
{
String line;
while ((line = sr.ReadLine()) != null)
{
Document doc = new Document();
doc.Add(new Field("title", line, Field.Store.YES, Field.Index.TOKENIZED));
doc.Add(new Field("time", DateTools.TimeToString(DateTime.Now.Ticks, DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));
writer.AddDocument(doc);
Thread.Sleep(1000);
}
}
//优化索引
writer.Optimize();
//关闭输入索引,这一步非常的重要只有调用这个方法索引才会被写入索引的目录里
writer.Close();
MessageBox.Show("索引创建成功!!");
// 搜索查询
if (txtKW.Text.Trim() == "")
{
MessageBox.Show("关键词不能为空!!");
return;
}
StandardAnalyzer standarAnalyzer = new StandardAnalyzer();
Query query = new QueryParser("title", standarAnalyzer).Parse(txtKW.Text.Trim());
IndexSearcher searcher = new IndexSearcher(@"E:/Winform/LuceneDemo/Index/");
Sort sort = new Sort(new SortField[] { new SortField("time", SortField.STRING, true) });
Hits hits = searcher.Search(query, sort);
DataTable dt = new DataTable();
dt.Columns.Add("title");
for (int i = 0; i < hits.Length(); i++)
{
Document doc = new Document();
doc = hits.Doc(i);
DataRow dr = dt.NewRow();
dr["Title"] = doc.Get("title");
dt.Rows.Add(dr);
}
lblTotal.ForeColor = Color.Red;
lblTotal.Text = string.Format("共有记录:{0}条", hits.Length());
gvResult.DataSource = dt;
lucene 的搜索相当强大,它提供了很多辅助查询类,每个类都继承自Query类,各自完成一种特殊的查询,你可以像搭积木一样将它们任意组合使用,完成一些复杂操作;
lucene还提供了Sort类对结果进行排序,提供了Filter类对查询条件进行限制。你或许会不自觉地拿它跟SQL语句进行比较: “lucene能执行and、or、order by、where、like ‘%xx%’操作吗?”回答是:“当然没问题!”
IndexSearcher:是lucene中最基本的检索工具.
Query:查询,lucene中支持模糊查询,短语查询,组合查询等等
QueryParser: 是一个解析用户输入的工具,可以通过扫描用户输入的字符串,生成Query对象。
Hits:在搜索完成之后,需要把搜索结果返回并显示给用户。在lucene中,搜索的结果的集合是用Hits类的实例来表示的。Hits对象中主要方法有:
length(): 返回搜索结果的总数,下面简单的用法中有用到Hit的这一个方法
doc(int n): 返回第n个文档
iterator(): 返回一个迭代器
各种各样的Query
TermQuery
Term t = new Term("content", " lucene";
Query query = new TermQuery(t);
BooleanQuery
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(termQuery1, booleanClause.Occur.SHOULD);
boolQuery.add(termQuery2, booleanClause.Occur.SHOULD);
RangeQuery
RangeQuery query = new RangeQuery(new Term(“time”, “20060101”), new Term(“time”, “20060130”), true);
QueryParser
TermQuery可以用“field:key”方式,例如“content:lucene”。
BooleanQuery中‘与’用‘+’,‘或’用‘ ’,例如“content:java contenterl”。
RangeQuery用‘[]’或‘{}’,前者表示闭区间,后者表示开区间,例如“time:[20060101 TO 20060130]”,注意TO区分大小写。
Filter
RangeFilter:设定只搜索指定范围内的索引。
QueryFilter:在上次查询的结果中搜索
//例子:
Directory dir = FSDirectory.getDirectory(PATH, false);
IndexSearcher is = new IndexSearcher(dir);
QueryParser parser = new QueryParser("content", new StandardAnalyzer());
Query query = parser.parse("title:lucene content:lucene";
RangeFilter filter = new RangeFilter("time", "20060101", "20060230", true, true);
Hits hits = is.search(query, filter);
for (int i = 0; i < hits.length(); i++)
{
Document doc = hits.doc(i);
System.out.println(doc.get("title");
}
is.close();
sort
结果集排序
Sort sort = new Sort(“time”); //相当于SQL的“order by time”
Sort sort = new Sort(“time”, true); // 相当于SQL的“order by time desc”
Directory dir = FSDirectory.getDirectory(PATH, false);
IndexSearcher is = new IndexSearcher(dir);
QueryParser parser = new QueryParser("content", new StandardAnalyzer());
Query query = parser.parse("title:lucene content:lucene";
RangeFilter filter = new RangeFilter("time", "20060101", "20060230", true, true);
Sort sort = new Sort(“time”);
Hits hits = is.search(query, filter, sort);
for (int i = 0; i < hits.length(); i++)
{
Document doc = hits.doc(i);
System.out.println(doc.get("title");
}
is.close();