通过java代码修改es的配置,当数据量超过配置的最大返回记录数时,修改查询最大返回记录数;经修改测验,ES内存设置为4G,300W数据可以正常查询,并能进行正常的翻页操作,查询效率也基本未受影响;
这之前通过游标scroll的方式进行大数据量的查询,虽然不受配置的最大返回记录数的限制,但是数据量超过10W时,翻页越往后越慢。
理论上是可以通过此方式查询出全量数据的,除非数据量太大,出现OOM
/**
* 更新es查询的最大返回记录数
*
* @param indexName index
* @param maxResultNum 最大返回记录数
*/
public static void updateMaxResultSetting(String indexName, long maxResultNum) {
log.info("[updateSetting] 准备更新es查询最大返回记录数, MaxResultNum:" + maxResultNum);
Client client = ElasticSearchManager.getClient();
UpdateSettingsRequest request = new UpdateSettingsRequest(indexName);
String settingKey = "index.max_result_window";
Settings settings = Settings.builder().put(settingKey, maxResultNum).build();
request.settings(settings);
request.indicesOptions(IndicesOptions.lenientExpandOpen());
client.admin().indices().updateSettings(request);
/*
* RefreshRequest refreshRequest = new RefreshRequest(indexName);
* client.admin().indices().refresh(refreshRequest);
*/
log.info("[updateSetting] 更新es查询最大返回记录数成功!");
}
/**
* 更新es查询的最大返回记录数
*
* @param indexNameList index名称列表
* @param maxResult 要设置的最大返回记录数
*/
public static void updateMaxResultSetting(List<String> indexNameList, long maxResult) {
for (int i = 0; i < indexNameList.size(); i++) {
String indexName = indexNameList.get(i);
try {
long curMaxResult = getMaxResultSetting(indexName);
if (curMaxResult < maxResult) {
updateMaxResultSetting(indexName, maxResult);
}
} catch (Exception e) {
log.error("updateMaxResultSetting by indexNames eror, indexNam: " + indexName + " error: "
+ e.getMessage());
}
}
}
/**
* 获取es查询的最大返回记录数
*
* @param indexName index
* @return 设置的查询最大返回记录数
*/
public static long getMaxResultSetting(String indexName) {
log.info("[updateSetting] 开始获取es查询最大返回记录数");
Client client = ElasticSearchManager.getClient();
GetSettingsRequest request = new GetSettingsRequest().indices(indexName);
request.names("index.max_result_window");
ActionFuture<GetSettingsResponse> future = client.admin().indices().getSettings(request);
GetSettingsResponse response = future.actionGet();
String numberOfMaxResult = response.getSetting(indexName, "index.max_result_window");
return Long.parseLong(numberOfMaxResult);
}
/**
* 资产查询
* @param indexName
* @param queryBean
* @param pageNo
* @param pageSize
* @return
*/
public static Map<String, Object> assetESQuery(String type,String sort,String order,Map<String, Object> map,int pageNo, int pageSize){
Client client = ElasticSearchManager.getClient();
//如果不存在则为空数据
if (!isExistsIndexAndType(client, "cems", type)) {
JSONArray array = new JSONArray();
Map<String, Object> dataMap = new HashMap<String, Object>();
dataMap.put("data", array);
dataMap.put("total", 0);
return dataMap;
}
SearchRequestBuilder searchRequestBuilder = client.prepareSearch("cems").setTypes(type);
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
for (String key : map.keySet()) {
if (key.indexOf("*") != -1) {
boolQueryBuilder.must(QueryBuilders.termsQuery(key.replace("*","").trim(), map.get(key).toString()));
} else {
if((key.startsWith("begin") || key.startsWith("end")) && key.endsWith("Time")){
continue;
}
if("rangeTime".equals(map.get(key))){
Long beginTime = (Long)map.get("begin"+key);
Long endTime = (Long)map.get("end"+key);
boolQueryBuilder.must(QueryBuilders.rangeQuery(key).from(beginTime).to(endTime));
}else{
boolQueryBuilder.must(QueryBuilders.wildcardQuery(key, "*"+map.get(key).toString()+"*"));
}
}
}
searchRequestBuilder.setQuery(boolQueryBuilder);
if (StringUtils.isNotBlank(sort)) {
searchRequestBuilder.addSort(sort, order == null || "desc".equals(order) ? SortOrder.DESC : SortOrder.ASC);
}
searchRequestBuilder.setFrom((pageNo-1)*pageSize).setSize(pageSize);
JSONArray array = new JSONArray();
Map<String, Object> dataMap = new HashMap<String, Object>();
try {
SearchResponse response = searchRequestBuilder.get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
// 获取配置信息--查询最大返回记录数
long maxResult = getMaxResultSetting("cems");
// 如果设置的最大返回值小于数据记录数
if(maxResult < total){
// 更新es查询的最大返回记录数, 扩大5倍
updateMaxResultSetting("cems", total / 10000 * 5 * 10000);
}
for(int i = 0; i < hits.getHits().length; i++) {
SearchHit searchHit = hits.getHits()[i];
JSONObject object = JSONObject.fromObject(searchHit.getSourceAsString());
array.add(object);
}
dataMap.put("data", array);
dataMap.put("total", total);
} catch (Exception e) {
log.error("[cascadeESQuery] es查询出错, error:" + e.getMessage());
}
return dataMap;
}