项目是查找100W条文件名
1.新建索引
/// <summary>
/// 初始化 将数据写到本地
/// </summary>
public void InitIndex()
{
DirectoryInfo dirInfo = new DirectoryInfo(@"F:\TEST");
FileInfo[] files = dirInfo.GetFiles("*.txt");
using (FSDirectory dir = FSDirectory.Open(new DirectoryInfo(IndexerPath)))//IndexerPath 索引要创建的地方 //, new NativeFSLockFactory()
{
using (IndexWriter writer = new IndexWriter(dir, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器
{
foreach (FileInfo item in files)
{
//在这里我只加了 id和title 我们也可以加入创建时间 文章内容等等
Document doc = new Document();//一条数据
//一个字段 列名 值 是否保存值 是否分
doc.Add(new Field("FileName", item.Name, Field.Store.YES, Field.Index.ANALYZED)); //, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS
doc.Add(new Field("Path", item.DirectoryName, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("Size", (item.Length / 1024.00).ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("CreateDateTime", item.CreationTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("LastDateTime", item.LastWriteTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.AddDocument(doc);//写进去
}
writer.Optimize();//优化 就是合并
}
}
}
2.查找数据
/// <summary>
/// 多个词组查询
/// </summary>
/// <param name="keyword"></param>
public List<FilesInfo> SearchData(string keyword)
{
List<FilesInfo> result = new List<FilesInfo>();
FSDirectory dir = FSDirectory.Open(new DirectoryInfo(IndexerPath), new NoLockFactory());
IndexSearcher searcher = new IndexSearcher(dir);//查找器
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "FileName", new PanGuAnalyzer());//解析器
Query query = parser.Parse(AnalyzerKeyword("FileName", keyword));
TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
int i = 0;
foreach (ScoreDoc item in docs.ScoreDocs)
{
Document doc = searcher.Doc(item.Doc);
FilesInfo info = new FilesInfo()
{
FileName = doc.Get("FileName"),
Path = doc.Get("Path"),
Size = doc.Get("Size"),
LastDateTime = doc.Get("LastDateTime"),
CreateDateTime = doc.Get("CreateDateTime"),
};
result.Add(info);
}
return result;
}
/// <summary>
/// 分词
/// </summary>
/// <param name="column">字段</param>
/// <param name="keyword">value</param>
/// <returns></returns>
private string AnalyzerKeyword(string column, string keyword)
{
StringBuilder queryStringBuilder = new StringBuilder();
PanGu.Segment.Init();
PanGu.Segment segment = new PanGu.Segment();
ICollection<PanGu.WordInfo> words = segment.DoSegment(keyword);
if (words.Count == 1)
{
queryStringBuilder.AppendFormat("{0}:{1}* ", column, words.FirstOrDefault().Word);
}
else
{
//string.Join(" ", words.Select(w => $"title:{w}"));
foreach (PanGu.WordInfo word in words)
{
queryStringBuilder.AppendFormat("{0}:{1} ", column, word.Word);
}
}
string result = queryStringBuilder.ToString().TrimEnd();
//logger.Info(string.Format("AnalyzerKeyword 将 keyword={0}转换为{1}", keyword, result));
return result;
}
2.1单个条件查找
//有时候找不到东西
public List<FilesInfo> Query(string queryStr)
{
List<FilesInfo> result = new List<FilesInfo>();
using (FSDirectory dir = FSDirectory.Open(new DirectoryInfo(IndexerPath), new NoLockFactory()))
{
using (IndexSearcher searcher = new IndexSearcher(dir)) //查找器
{
TermQuery query = new TermQuery(new Term("FileName", queryStr + "*"));//包含
TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
foreach (var item in docs.ScoreDocs)
{
Document doc = searcher.Doc(item.Doc);
FilesInfo info = new FilesInfo()
{
FileName = doc.Get("FileName"),
Path = doc.Get("Path"),
Size = doc.Get("Size"),
LastDateTime = doc.Get("LastDateTime"),
CreateDateTime = doc.Get("CreateDateTime"),
};
result.Add(info);
}
}
}
return result;
}
}
3.增加索引
public void AddIndex(FileInfo fileInfo)
{
using (FSDirectory dir = FSDirectory.Open(new DirectoryInfo(IndexerPath)))//, new NativeFSLockFactory()
{
using (IndexWriter writer = new IndexWriter(dir, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器
{
//在这里我只加了 id和title 我们也可以加入创建时间 文章内容等等
Document doc = new Document();//一条数据
//一个字段 列名 值 是否保存值 是否分
doc.Add(new Field("FileName", fileInfo.Name, Field.Store.YES, Field.Index.ANALYZED)); //, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS
doc.Add(new Field("Path", fileInfo.DirectoryName, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("Size", (fileInfo.Length / 1024.00).ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("CreateDateTime", fileInfo.CreationTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("LastDateTime", fileInfo.LastWriteTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.AddDocument(doc);//写进去
writer.Optimize();//优化 就是合并
}
}
}