GroupingSearch源码分析

最新推荐文章于 2024-09-25 09:21:39 发布

Drift2333

最新推荐文章于 2024-09-25 09:21:39 发布

阅读量108

点赞数

分类专栏： lucene 文章标签：数据库 java 开发语言

本文链接：https://blog.csdn.net/Drift2333/article/details/128715675

版权

lucene 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

文章详细解析了Lucene中的GroupingSearch类的源码，主要关注如何根据groupField、groupFunction或groupEndDocs进行单次和双次查询。在第一次查询中，使用FirstPassGroupingCollector收集分组信息，并可能使用缓存优化。第二次查询用于获取每个组内的TopDocs。缓存机制被用来提高性能，避免重复评分。

摘要由CSDN通过智能技术生成

GroupingSearch源码解析

//当有结束文档定位时使用BlockGroupingCollector单次查询
public <T> TopGroups<T> search(IndexSearcher searcher, Query query, int groupOffset, int groupLimit) throws IOException {
        if (this.grouper != null) {
        //双次遍历
            return this.groupByFieldOrFunction(searcher, query, groupOffset, groupLimit);
        } else if (this.groupEndDocs != null) {
        //单次遍历
            return this.groupByDocBlock(searcher, query, groupOffset, groupLimit);
        } else {
            throw new IllegalStateException("Either groupField, groupFunction or groupEndDocs must be set.");
        }
    }

调用GroupingSearch中的search()方法

protected TopGroups groupByFieldOrFunction(IndexSearcher searcher, Query query, int groupOffset, int groupLimit) throws IOException {
        int topN = groupOffset + groupLimit;
        //第一次查询收集符合条件的组
        FirstPassGroupingCollector firstPassCollector = new FirstPassGroupingCollector(this.grouper, this.groupSort, topN);
        //收集与查询匹配的所有组，收集总的分组数量
        AllGroupsCollector allGroupsCollector = this.allGroups ? new AllGroupsCollector(this.grouper) : null;
        //为匹配查询的每个组收集最相关的文档
        AllGroupHeadsCollector allGroupHeadsCollector = this.allGroupHeads ? AllGroupHeadsCollector.newCollector(this.grouper, this.sortWithinGroup) : null;
        Collector firstRound = MultiCollector.wrap(new Collector[]{firstPassCollector, allGroupsCollector, allGroupHeadsCollector});
        CachingCollector cachedCollector = null;
        /***********************************************/
        //第一次查询缓存容量的大小：maxCacheRAMMB ，要缓存的最大文档数：maxDocsToCache 
        if (this.maxCacheRAMMB == null && this.maxDocsToCache == null) {
            searcher.search(query, firstRound);
        } else {
        //缓存容量不为null,为第一次查询加缓存，避免重复评分
        //要缓存的最大文档不为null,通过最大文档加缓存
            if (this.maxCacheRAMMB != null) {
                cachedCollector = CachingCollector.create(firstRound, this.cacheScores, this.maxCacheRAMMB);
            } else {
                cachedCollector = CachingCollector.create(firstRound, this.cacheScores, this.maxDocsToCache);
            }
        //查询缓存
            searcher.search(query, cachedCollector);
        }
        //匹配的所有组
        this.matchingGroups = (Collection)(this.allGroups ? allGroupsCollector.getGroups() : Collections.emptyList());
        //每组最相关的文档
        this.matchingGroupHeads = (Bits)(this.allGroupHeads ? allGroupHeadsCollector.retrieveGroupHeads(searcher.getIndexReader().maxDoc()) : new MatchNoBits(searcher.getIndexReader().maxDoc()));
        //第一次查询返回的结果集topSearchGroups中只有分组域值以及每组总的评分，至于每个分组里有几条，分别有哪些索引文档，则需要进行第二次查询获取
        Collection<SearchGroup> topSearchGroups = firstPassCollector.getTopGroups(groupOffset, this.fillSortFields);
        if (topSearchGroups == null) {
            return new TopGroups(new SortField[0], new SortField[0], 0, 0, new GroupDocs[0], 0.0F / 0.0);
        } else {
        //组内文档最大值
            int topNInsideGroup = this.groupDocsOffset + this.groupDocsLimit;
        //第二遍收集器，收集每个组的 TopDocs，并将它们作为 TopGroups 对象返回
            TopGroupsCollector secondPassCollector = new TopGroupsCollector(this.grouper, topSearchGroups, this.groupSort, this.sortWithinGroup, topNInsideGroup, this.includeScores, this.includeMaxScore, this.fillSortFields);
        /*************************************************/
        //如果有缓存，直接取缓存数据，没有则进行二次搜索
        if (cachedCollector != null && cachedCollector.isCached()) {
                cachedCollector.replay(secondPassCollector);
            } else {
                searcher.search(query, secondPassCollector);
            }

            return this.allGroups ? new TopGroups(secondPassCollector.getTopGroups(this.groupDocsOffset), this.matchingGroups.size()) : secondPassCollector.getTopGroups(this.groupDocsOffset);
        }
    }