0、什么是lucene?
lucene是一组工开发人员调用的API,不想solr一样拿来就能直接运行。lucene现在支持很多语言:Java、 C# 、 perl等。本篇文章将以c#讲解lucene的使用流程。lucene的使用大致分为:建立索引、搜索查询;
1、建立索引
Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_CURRENT); Directory directory = FSDirectory.GetDirectory(@"D:\project\index", false); // true:存在则删除 false:存在 //Directory directory = new RAMDirectory();//索引目录放在内存中,小数据可以这样子做,如果大数据的话,内存是不够放的。 //D:\project\index 为存放索引目录的路径 IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); string strdir = "D:\\project\\test007";
//D:\\project\\test007为要索引的文件的目录 DirectoryInfo dir = new DirectoryInfo(strdir);
if (dir.Exists)
{
FileInfo[] files = dir.GetFiles();
for (int i = 0; i < files.Length; i++)
{
if (files[i].Exists)
{
Console.WriteLine("path : " + files[i].FullName);
Document doc = new Document();
//一个Document对应数据库中的一条记录,或者文件系统中的一个文件 string strLine;
FileStream aFile = files[i].OpenRead();
StreamReader sr = new StreamReader(aFile);
strLine = sr.ReadLine();
char[] sepratorrr = { '\t' };
//数据的格式为: name\t lily // content\t i am a boy.(and so on) while (strLine !=null)
{
if (strLine == "")
{
strLine = sr.ReadLine();
continue;
}
string[] mywords = strLine.Split(sepratorrr);
Console.WriteLine(" filename :" + mywords[0] + " value : " + mywords[1]);
if (mywords[0] == "time"||mywords[0]=="fans")
{
Console.WriteLine("time flied");
doc.Add(new Field(mywords[0], mywords[1], Field.Store.YES, Field.Index.UN_TOKENIZED));
//time或者 fans字段只粗存不拆分建立索引 }
else
{
doc.Add(new Field(mywords[0], mywords[1], Field.Store.YES, Field.Index.ANALYZED));
//其他字段既存储有索引 }
strLine = sr.ReadLine();
}
iwriter.AddDocument(doc);
//把doc文档写入索引中
}
else
{
Console.WriteLine("file is not exit: " + files[i].FullName);
}
}
Console.WriteLine(dir + " open sucessfully");
}
else
Console.WriteLine(dir + " the dir is not exits");
//=============================================================================
iwriter.Optimize();
iwriter.Close();
2、搜索查询
IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
Console.Write("inter a serch key word : ");
string key = Console.ReadLine();
if (key != null)
{
BooleanClause.Occur[] flags = new BooleanClause.Occur[] {
BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
Query query = MultiFieldQueryParser.Parse(key,new string[]{"name","content"},flags,analyzer);
SortField[] sortOrder = { new SortField("time", true), new SortField("fans", SortField.LONG, true) }; //
Sort sort = new Sort(sortOrder);//, SortField.STRING
Hits hits = isearcher.Search(query, sort);
// Hits hits = isearcher.Search(query);
Console.WriteLine("有个{0}搜索结果 ", hits.Length());
for (int i = 0; i < hits.Length(); i++)
{
// Document hitDoc = isearcher.Doc(i);//handwriting error for my 1 day to debug
Document hitDoc = hits.Doc(i);
Console.Write(hitDoc.Get("name") + " " + hitDoc.Get("fans") + " " + hitDoc.Get("gender") + " " + hitDoc.Get("time"));
Console.WriteLine(hitDoc.Get("content"));
}
}
isearcher.Close();