使用from size进行分页查询(深度分页),当数据量大的时候,对全部数据进行遍历,使用from size性能会很差。from指的是从哪里开始拿数据,size是结果集中返回的文档个数。from-size的工作原理是:如size=10&from=100,那么Elasticsearch会从每个分片里取出110条数据,然后汇集到一起再排序,取出101~110序号的文档。由此可见,from-size的效率必然不会很高,特别是分页越深,需要排序的数据越多,其效率就越低。
public static List<Map<String, Object>> searchAllData(String index, String type, String fields, String sortField, String highlightField,QueryBuilder queryBuilder ) {
//指定一个index和type
SearchRequestBuilder searchRequestBuilder = client.prepareSearch(index).setTypes(type)
// 高亮(xxx=111,aaa=222)
if (StringUtils.isNotEmpty(highlightField)) {
HighlightBuilder highlightBuilder = new HighlightBuilder()
highlightBuilder.preTags("<span style='color:red;font-weight:bold'>")
highlightBuilder.postTags("</span>")
// 设置高亮字段
highlightBuilder.field(highlightField)
searchRequestBuilder.highlighter(highlightBuilder)
}
// 需要显示的字段,逗号分隔(缺省为全部字段)
if (StringUtils.isNotEmpty(fields)) {
searchRequestBuilder.setFetchSource(fields.split(","), null)
}
searchRequestBuilder.setFetchSource(true)
if (StringUtils.isNotEmpty(sortField)) {
searchRequestBuilder.addSort(sortField, SortOrder.ASC)
}
//设置每批读取的数据量
searchRequestBuilder.setSize(100)
//查询条件
searchRequestBuilder.setQuery(queryBuilder)
//设置 search context 维护1分钟的有效期
searchRequestBuilder.setScroll(TimeValue.timeValueMinutes(1))
//获得首次的查询结果
SearchResponse scrollResp=searchRequestBuilder.get()
//打印的内容 可以在 Elasticsearch head 和 Kibana 上执行查询
LOGGER.info("\n{}", searchRequestBuilder)
//打印命中数量
LOGGER.info("命中总数量:{}", scrollResp.getHits().getTotalHits())
List<Map<String, Object>> sourceList = new ArrayList<Map<String, Object>>()
StringBuffer stringBuffer = new StringBuffer()
do {
//将scorllId循环传递
scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(TimeValue.timeValueMinutes(1)).execute().actionGet()
for (SearchHit searchHit : scrollResp.getHits().getHits()) {
searchHit.getSourceAsMap().put("id", searchHit.getId())
if (StringUtils.isNotEmpty(highlightField)) {
if (!ObjectUtils.isEmpty(searchHit.getHighlightFields().get(highlightField))) {
Text[] text = searchHit.getHighlightFields().get(highlightField).getFragments()
if (text != null) {
for (Text str : text) {
stringBuffer.append(str.string())
}
//遍历 高亮结果集,覆盖 正常结果集
searchHit.getSourceAsMap().put(highlightField, stringBuffer.toString())
}
}
}
sourceList.add(searchHit.getSourceAsMap())
}
//当searchHits的数组为空的时候结束循环,至此数据全部读取完毕
} while(scrollResp.getHits().getHits().length != 0)
//删除scroll
ClearScrollRequest clearScrollRequest = new ClearScrollRequest()
clearScrollRequest.addScrollId(scrollResp.getScrollId())
client.clearScroll(clearScrollRequest).actionGet()
return sourceList
}