CountRequest countRequest = new CountRequest();
countRequest.types("TYPE");
countRequest.indices("INDEX");
// 拼接查询条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
countRequest.source(searchSourceBuilder);
long count = restHighLevelClient.count(countRequest, RequestOptions.DEFAULT);
if (count > 300000) {
throw new RuntimeException("下载数量不可超过30W条");
}
// 采用游标查询
searchSourceBuilder.size(10000);
SearchRequest searchRequest = new SearchRequest("INDEX").types("TYPE");
searchRequest.source(searchSourceBuilder);
Scroll scroll = new Scroll(TimeValue.timeValueMinutes(5L));
searchRequest.scroll(scroll);
SearchResponse searchResponse = restHighLevelClient.search(request, RequestOptions.DEFAULT);
String scrollId = searchResponse.getScrollId();
SearchHit[] searchHits = searchResponse.getHits().getHits();
List<SearchHit> searchHitList = new ArrayList<>(Arrays.asList(searchHits));
while (true) {
if (searchHits.length == 0) {
break;
}
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);
searchResponse = restHighLevelClient.scroll(searchScrollRequest, RequestOptions.DEFAULT);
scrollId = searchResponse.getScrollId();
searchHits = searchResponse.getHits().getHits();
// 导出也可以写在此处,在导出大批量的数据时,JVM内存有限,使用SXSSF进行导出,防止JVM内存撑爆
searchHitList.addAll(Arrays.asList(searchHits));
}
// 清除滚动ID
esService.clearScroll(scrollId);
// 数据转换
// 对结果进行排序
// 下载
可以使用 SXSSFWorkbook 进行大批量导出,获取一部分数据导出一部分,减缓JVM压力
文章讲述了在Elasticsearch中使用CountRequest和Scroll进行高效查询,同时提及了如何控制下载数量以避免JVM内存溢出,以及利用SXSSFWorkbook进行分批导出数据以减轻内存压力。
740

被折叠的 条评论
为什么被折叠?



