现有如下需求:
从3.5亿左右的订单中查询商品各级类目,并且进行去重,由于各级类目编号不重复,可按末级类目编号去重
3.5亿左右数据,15个分片,秒查出来,还可以
Json请求如下:
按item_last_cate_cd聚合去重,size返回多少条,includes返回指定字段
{
"aggregations":{
"item_aggs":{
"terms":{
"field":"item_last_cate_cd",
"size":10000
},
"aggregations":{
"item_top":{
"top_hits":{
"_source":{
"includes":[
"item_first_cate_cd",
"item_first_cate_name",
"item_second_cate_cd",
"item_second_cate_name",
"item_third_cate_cd",
"item_third_cate_name",
"item_last_cate_cd",
"item_last_cate_name"],
"excludes":[
]
}
}
}
}
}
}
}
返回内容:
{
"aggregations":{
"asMap":{
"item_aggs":{
"buckets":[
{
"aggregations":{
"asMap":{
"item_top":{
"fragment":true,
"hits":{
"fragment":true,
"hits":[
{
"fields":{
},
"fragment":false,
"highlightFields":{
},
"id":"68066473296",
"matchedQueries":[
],
"primaryTerm":0,
"rawSortValues":[
],
"score":1.0,
"seqNo":-2,
"sortValues":[
],
"sourceAsMap":{
"item_second_cate_name":"男装",
"item_second_cate_cd":"1342",
"item_third_cate_cd":"1349",
"item_third_cate_name":"T恤",
"item_first_cate_name":"服饰内衣",
"item_first_cate_cd":"1315",
"item_last_cate_cd":"1349",
"item_last_cate_name":"T恤"
},
"sourceAsString":"{\"item_second_cate_name\":\"男装\",\"item_second_cate_cd\":\"1342\",\"item_third_cate_cd\":\"1349\",\"item_third_cate_name\":\"T恤\",\"item_first_cate_name\":\"服饰内衣\",\"item_first_cate_cd\":\"1315\",\"item_last_cate_cd\":\"1349\",\"item_last_cate_name\":\"T恤\"}",
"sourceRef":{
"fragment":true
},
"type":"table",
"version":-1
}],
"maxScore":1.0,
"totalHits":10717797
},
"name":"item_top",
"type":"top_hits"
}
},
"fragment":true
},
"docCount":10717797,
"docCountError":0,
"fragment":true,
"key":"1349",
"keyAsNumber":1349.0,
"keyAsString":"1349"
}],
"docCountError":2710056,
"fragment":true,
"name":"item_aggs",
"sumOfOtherDocCounts":350512425,
"type":"sterms"
}
},
"fragment":true
}
}
Java关键代码实现如下:
public List<OriginalCagegory> aggregationSearch(RestHighLevelClient client) {
//字段筛选,查询以下指定字段
String[] includes = {
"item_first_cate_cd",
"item_first_cate_name",
"item_second_cate_cd",
"item_second_cate_name",
"item_third_cate_cd",
"item_third_cate_name",
"item_last_cate_cd",
"item_last_cate_name"
};
//按末级类目代码聚合去重
AggregationBuilder aggregationBuilder = AggregationBuilders
.terms("item_aggs")
.field("item_last_cate_cd") //根据item_last_cate_cd去重
.size(10000) //查询1w条
.subAggregation(
AggregationBuilders.topHits("item_top")
.fetchSource(includes, Strings.EMPTY_ARRAY)
.size(1)
);
SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource();
sourceBuilder.aggregation(aggregationBuilder);
String index = String.format("tp%s", DateFormatUtils.format(new Date(), DATE_FORMAT));
SearchRequest searchRequest = new SearchRequest(index);//索引
searchRequest.source(sourceBuilder);
Builder builder = RequestOptions.DEFAULT.toBuilder();
builder.addHeader("Host", HOST);//查询服务器的域名
RequestOptions requestOptions = builder.build();
SearchResponse response = null;
try {
response = client.search(searchRequest, requestOptions);//searchAsync异步
} catch (IOException e) {
LOGGER.error("occurr an exception while search es:{}",e.getMessage(),e);
}
List<OriginalCagegory> originalCagegories = new ArrayList<>();
Terms item = response.getAggregations().get("item_aggs");
for (Terms.Bucket entry : item.getBuckets()) {
TopHits top = entry.getAggregations().get("item_top");
for (SearchHit hit : top.getHits()) {//hit.getSourceAsString()就是查询出来的json格式includes字段
OriginalCagegory originalCagegory = JSONObject.parseObject(hit.getSourceAsString(), OriginalCagegory.class);
originalCagegories.add(originalCagegory);
}
}
return originalCagegories;
}
创建连接和关闭连接如下:
/**
* 得到连接
* @return
*/
public RestHighLevelClient getConnection() {
HttpHost httpHost = new HttpHost(HOST);
RestClientBuilder builder = RestClient.builder(httpHost);
RestHighLevelClient client = null;
if (client == null) {
client = new RestHighLevelClient(builder);
}
return client;
}
/**
* 关闭连接
* @param client
*/
public void closeConnection(RestHighLevelClient client) {
try {
if (client != null) {
client.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}