Elasticsearch6.x聚合查询指定字段并去重

 现有如下需求:

从3.5亿左右的订单中查询商品各级类目,并且进行去重,由于各级类目编号不重复,可按末级类目编号去重

3.5亿左右数据,15个分片,秒查出来,还可以


Json请求如下:

按item_last_cate_cd聚合去重,size返回多少条,includes返回指定字段

{
    "aggregations":{
        "item_aggs":{
            "terms":{
                "field":"item_last_cate_cd",
                "size":10000
            },
            "aggregations":{
                "item_top":{
                    "top_hits":{
                        "_source":{
                            "includes":[
                                "item_first_cate_cd",
                                "item_first_cate_name",
                                "item_second_cate_cd",
                                "item_second_cate_name",
                                "item_third_cate_cd",
                                "item_third_cate_name",
                                "item_last_cate_cd",
                                "item_last_cate_name"],
                            "excludes":[
                            ]
                        }
                    }
                }
            }
        }
    }
}

返回内容:

{
    "aggregations":{
        "asMap":{
            "item_aggs":{
                "buckets":[
                    {
                        "aggregations":{
                            "asMap":{
                                "item_top":{
                                    "fragment":true,
                                    "hits":{
                                        "fragment":true,
                                        "hits":[
                                            {
                                                "fields":{
                                                },
                                                "fragment":false,
                                                "highlightFields":{
                                                },
                                                "id":"68066473296",
                                                "matchedQueries":[
                                                ],
                                                "primaryTerm":0,
                                                "rawSortValues":[
                                                ],
                                                "score":1.0,
                                                "seqNo":-2,
                                                "sortValues":[
                                                ],
                                                "sourceAsMap":{
                                                    "item_second_cate_name":"男装",
                                                    "item_second_cate_cd":"1342",
                                                    "item_third_cate_cd":"1349",
                                                    "item_third_cate_name":"T恤",
                                                    "item_first_cate_name":"服饰内衣",
                                                    "item_first_cate_cd":"1315",
                                                    "item_last_cate_cd":"1349",
                                                    "item_last_cate_name":"T恤"
                                                },
                                                "sourceAsString":"{\"item_second_cate_name\":\"男装\",\"item_second_cate_cd\":\"1342\",\"item_third_cate_cd\":\"1349\",\"item_third_cate_name\":\"T恤\",\"item_first_cate_name\":\"服饰内衣\",\"item_first_cate_cd\":\"1315\",\"item_last_cate_cd\":\"1349\",\"item_last_cate_name\":\"T恤\"}",
                                                "sourceRef":{
                                                    "fragment":true
                                                },
                                                "type":"table",
                                                "version":-1
                                            }],
                                        "maxScore":1.0,
                                        "totalHits":10717797
                                    },
                                    "name":"item_top",
                                    "type":"top_hits"
                                }
                            },
                            "fragment":true
                        },
                        "docCount":10717797,
                        "docCountError":0,
                        "fragment":true,
                        "key":"1349",
                        "keyAsNumber":1349.0,
                        "keyAsString":"1349"
                    }],
                "docCountError":2710056,
                "fragment":true,
                "name":"item_aggs",
                "sumOfOtherDocCounts":350512425,
                "type":"sterms"
            }
        },
        "fragment":true
    }
}

Java关键代码实现如下:

    public List<OriginalCagegory> aggregationSearch(RestHighLevelClient client) {
        //字段筛选,查询以下指定字段
        String[] includes = {
                "item_first_cate_cd",
                "item_first_cate_name",
                "item_second_cate_cd",
                "item_second_cate_name",
                "item_third_cate_cd", 
                "item_third_cate_name", 
                "item_last_cate_cd", 
                "item_last_cate_name" 
                };

        //按末级类目代码聚合去重
        AggregationBuilder aggregationBuilder = AggregationBuilders
                .terms("item_aggs")
                .field("item_last_cate_cd") //根据item_last_cate_cd去重
                .size(10000) //查询1w条
                .subAggregation(
                         AggregationBuilders.topHits("item_top")
                        .fetchSource(includes, Strings.EMPTY_ARRAY)
                        .size(1)
                );

        SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource();
        sourceBuilder.aggregation(aggregationBuilder);
        
        String index = String.format("tp%s", DateFormatUtils.format(new Date(), DATE_FORMAT));
        SearchRequest searchRequest = new SearchRequest(index);//索引
        searchRequest.source(sourceBuilder);
        
        Builder builder = RequestOptions.DEFAULT.toBuilder();
        builder.addHeader("Host", HOST);//查询服务器的域名
        RequestOptions requestOptions = builder.build();
        SearchResponse response = null;
        try {
            response = client.search(searchRequest, requestOptions);//searchAsync异步
        } catch (IOException e) {
            LOGGER.error("occurr an exception while search es:{}",e.getMessage(),e);
        }
        List<OriginalCagegory>  originalCagegories = new ArrayList<>();
        Terms item = response.getAggregations().get("item_aggs");
        for (Terms.Bucket entry : item.getBuckets()) {
            TopHits top = entry.getAggregations().get("item_top");
            for (SearchHit hit : top.getHits()) {//hit.getSourceAsString()就是查询出来的json格式includes字段
                OriginalCagegory originalCagegory = JSONObject.parseObject(hit.getSourceAsString(), OriginalCagegory.class);
                originalCagegories.add(originalCagegory);
            }
        }
        return originalCagegories;
    }

创建连接和关闭连接如下:

	/**
	 * 得到连接
	 * @return
	 */
	public RestHighLevelClient getConnection() {
		HttpHost httpHost = new HttpHost(HOST);
		RestClientBuilder builder = RestClient.builder(httpHost);
		RestHighLevelClient client = null;
		if (client == null) {
			client = new RestHighLevelClient(builder);
		}
		return client;
	}
	
	/**
	 * 关闭连接
	 * @param client
	 */
	public void closeConnection(RestHighLevelClient client) {
		try {
			if (client != null) {
				client.close();
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

 

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值