Lucene

项目是查找100W条文件名

1.新建索引

        /// <summary>
        /// 初始化 将数据写到本地
        /// </summary>
        public void InitIndex()
        {
            DirectoryInfo dirInfo = new DirectoryInfo(@"F:\TEST");
            FileInfo[] files = dirInfo.GetFiles("*.txt");
            using (FSDirectory dir = FSDirectory.Open(new DirectoryInfo(IndexerPath)))//IndexerPath 索引要创建的地方 //, new NativeFSLockFactory()
            {
                using (IndexWriter writer = new IndexWriter(dir, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器
                {
                    foreach (FileInfo item in files)
                    {
                        //在这里我只加了 id和title   我们也可以加入创建时间  文章内容等等
                        Document doc = new Document();//一条数据
                        //一个字段  列名  值   是否保存值  是否分
                        doc.Add(new Field("FileName", item.Name, Field.Store.YES, Field.Index.ANALYZED)); //, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS
                        doc.Add(new Field("Path", item.DirectoryName, Field.Store.YES, Field.Index.NOT_ANALYZED));
                        doc.Add(new Field("Size", (item.Length / 1024.00).ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                        doc.Add(new Field("CreateDateTime", item.CreationTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                        doc.Add(new Field("LastDateTime", item.LastWriteTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                        writer.AddDocument(doc);//写进去
                    }
                    writer.Optimize();//优化  就是合并
                }
            }
        }

2.查找数据

        /// <summary>
        /// 多个词组查询
        /// </summary>
        /// <param name="keyword"></param>
        public List<FilesInfo> SearchData(string keyword)
        {
            List<FilesInfo> result = new List<FilesInfo>();
            FSDirectory dir = FSDirectory.Open(new DirectoryInfo(IndexerPath), new NoLockFactory());
            IndexSearcher searcher = new IndexSearcher(dir);//查找器

            QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "FileName", new PanGuAnalyzer());//解析器
            Query query = parser.Parse(AnalyzerKeyword("FileName", keyword));
            TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
            int i = 0;
            foreach (ScoreDoc item in docs.ScoreDocs)
            {
                Document doc = searcher.Doc(item.Doc);
                FilesInfo info = new FilesInfo()
                {
                    FileName = doc.Get("FileName"),
                    Path = doc.Get("Path"),
                    Size = doc.Get("Size"),
                    LastDateTime = doc.Get("LastDateTime"),
                    CreateDateTime = doc.Get("CreateDateTime"),

                };
                result.Add(info);
            }
            return result;
        }
        /// <summary>
        /// 分词
        /// </summary>
        /// <param name="column">字段</param>
        /// <param name="keyword">value</param>
        /// <returns></returns>
        private string AnalyzerKeyword(string column, string keyword)
        {
            StringBuilder queryStringBuilder = new StringBuilder();
            PanGu.Segment.Init();
            PanGu.Segment segment = new PanGu.Segment();
            ICollection<PanGu.WordInfo> words = segment.DoSegment(keyword);

            if (words.Count == 1)
            {
                queryStringBuilder.AppendFormat("{0}:{1}* ", column, words.FirstOrDefault().Word);
            }
            else
            {
                //string.Join(" ", words.Select(w => $"title:{w}"));
                foreach (PanGu.WordInfo word in words)
                {
                    queryStringBuilder.AppendFormat("{0}:{1} ", column, word.Word);
                }
            }
            string result = queryStringBuilder.ToString().TrimEnd();
            //logger.Info(string.Format("AnalyzerKeyword 将 keyword={0}转换为{1}", keyword, result));
            return result;
        }

2.1单个条件查找

        //有时候找不到东西
public List<FilesInfo> Query(string queryStr)
        {
            List<FilesInfo> result = new List<FilesInfo>();
            using (FSDirectory dir = FSDirectory.Open(new DirectoryInfo(IndexerPath), new NoLockFactory()))
            {
                using (IndexSearcher searcher = new IndexSearcher(dir)) //查找器
                {
                    TermQuery query = new TermQuery(new Term("FileName", queryStr + "*"));//包含
                    TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
                    foreach (var item in docs.ScoreDocs)
                    {
                        Document doc = searcher.Doc(item.Doc);
                        FilesInfo info = new FilesInfo()
                        {
                            FileName = doc.Get("FileName"),
                            Path = doc.Get("Path"),
                            Size = doc.Get("Size"),
                            LastDateTime = doc.Get("LastDateTime"),
                            CreateDateTime = doc.Get("CreateDateTime"),
                        };
                        result.Add(info);
                    }
                }
            }
            return result;
        }
    }

3.增加索引

        public void AddIndex(FileInfo fileInfo)
        {

            using (FSDirectory dir = FSDirectory.Open(new DirectoryInfo(IndexerPath)))//, new NativeFSLockFactory()
            {
                using (IndexWriter writer = new IndexWriter(dir, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器
                {
                    //在这里我只加了 id和title   我们也可以加入创建时间  文章内容等等
                    Document doc = new Document();//一条数据
                    //一个字段  列名  值   是否保存值  是否分
                    doc.Add(new Field("FileName", fileInfo.Name, Field.Store.YES, Field.Index.ANALYZED)); //, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS
                    doc.Add(new Field("Path", fileInfo.DirectoryName, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("Size", (fileInfo.Length / 1024.00).ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("CreateDateTime", fileInfo.CreationTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("LastDateTime", fileInfo.LastWriteTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    writer.AddDocument(doc);//写进去
                    writer.Optimize();//优化  就是合并
                }
            }
        }

源码:https://download.csdn.net/download/lw8014/36488166

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值