GroupingSearch源码解析
//当有结束文档定位时使用BlockGroupingCollector单次查询
public <T> TopGroups<T> search(IndexSearcher searcher, Query query, int groupOffset, int groupLimit) throws IOException {
if (this.grouper != null) {
//双次遍历
return this.groupByFieldOrFunction(searcher, query, groupOffset, groupLimit);
} else if (this.groupEndDocs != null) {
//单次遍历
return this.groupByDocBlock(searcher, query, groupOffset, groupLimit);
} else {
throw new IllegalStateException("Either groupField, groupFunction or groupEndDocs must be set.");
}
}
调用GroupingSearch中的search()方法
protected TopGroups groupByFieldOrFunction(IndexSearcher searcher, Query query, int groupOffset, int groupLimit) throws IOException {
int topN = groupOffset + groupLimit;
//第一次查询收集符合条件的组
FirstPassGroupingCollector firstPassCollector = new FirstPassGroupingCollector(this.grouper, this.groupSort, topN);
//收集与查询匹配的所有组,收集总的分组数量
AllGroupsCollector allGroupsCollector = this.allGroups ? new AllGroupsCollector(this.grouper) : null;
//为匹配查询的每个组收集最相关的文档
AllGroupHeadsCollector allGroupHeadsCollector = this.allGroupHeads ? AllGroupHeadsCollector.newCollector(this.grouper, this.sortWithinGroup) : null;
Collector firstRound = MultiCollector.wrap(new Collector[]{firstPassCollector, allGroupsCollector, allGroupHeadsCollector});
CachingCollector cachedCollector = null;
/***********************************************/
//第一次查询缓存容量的大小:maxCacheRAMMB ,要缓存的最大文档数:maxDocsToCache
if (this.maxCacheRAMMB == null && this.maxDocsToCache == null) {
searcher.search(query, firstRound);
} else {
//缓存容量不为null,为第一次查询加缓存,避免重复评分
//要缓存的最大文档不为null,通过最大文档加缓存
if (this.maxCacheRAMMB != null) {
cachedCollector = CachingCollector.create(firstRound, this.cacheScores, this.maxCacheRAMMB);
} else {
cachedCollector = CachingCollector.create(firstRound, this.cacheScores, this.maxDocsToCache);
}
//查询缓存
searcher.search(query, cachedCollector);
}
//匹配的所有组
this.matchingGroups = (Collection)(this.allGroups ? allGroupsCollector.getGroups() : Collections.emptyList());
//每组最相关的文档
this.matchingGroupHeads = (Bits)(this.allGroupHeads ? allGroupHeadsCollector.retrieveGroupHeads(searcher.getIndexReader().maxDoc()) : new MatchNoBits(searcher.getIndexReader().maxDoc()));
//第一次查询返回的结果集topSearchGroups中只有分组域值以及每组总的评分,至于每个分组里有几条,分别有哪些索引文档,则需要进行第二次查询获取
Collection<SearchGroup> topSearchGroups = firstPassCollector.getTopGroups(groupOffset, this.fillSortFields);
if (topSearchGroups == null) {
return new TopGroups(new SortField[0], new SortField[0], 0, 0, new GroupDocs[0], 0.0F / 0.0);
} else {
//组内文档最大值
int topNInsideGroup = this.groupDocsOffset + this.groupDocsLimit;
//第二遍收集器,收集每个组的 TopDocs,并将它们作为 TopGroups 对象返回
TopGroupsCollector secondPassCollector = new TopGroupsCollector(this.grouper, topSearchGroups, this.groupSort, this.sortWithinGroup, topNInsideGroup, this.includeScores, this.includeMaxScore, this.fillSortFields);
/*************************************************/
//如果有缓存,直接取缓存数据,没有则进行二次搜索
if (cachedCollector != null && cachedCollector.isCached()) {
cachedCollector.replay(secondPassCollector);
} else {
searcher.search(query, secondPassCollector);
}
return this.allGroups ? new TopGroups(secondPassCollector.getTopGroups(this.groupDocsOffset), this.matchingGroups.size()) : secondPassCollector.getTopGroups(this.groupDocsOffset);
}
}