一、ElasticSearch的分组聚合
官网文档地址:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_multi_field_terms_aggregation
实现背景:实现类似SQL的group by功能:
select team,age from a_person group by team,age;
二、多字段分组聚合的三种实现方法(性能不高)
在ElasticSearch中,实现多字段分组聚合的方式包括:
1、使用 Script
使用脚本script从多个字段聚合分组:
dsl:
GET /_search
{
"query": {
"match_all": {}
},
"aggs": {
"group_ab": {
"terms": {
"script": {
"source": "doc['a'].value +','+ doc['b'].value",
"lang": "painless"
}
}
}
}
}
}
code:
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
/**
* 基于单bucket实现的多字段group by聚合
*
* @author tang
*
*/
public class AggsOneBucketGroupBy {
private static final String elasticSearchClusterName = "cluster";
private static final String elasticSearchAddress = "127.0.0.1";
private static final int elasticSearchPort = 9300;
private static String ALL_INDEX = "*";
private static String SEPARATOR = "|";
public static TermsAggregationBuilder buildTermsAggregationBuilder(List<String> aggregationFields) {
String content = aggregationFields.stream().map(one -> String.format("doc['%s'].value", one))
.collect(Collectors.joining("+'" + SEPARATOR + "'+"));
TermsAggregationBuilder builder = AggregationBuilders.terms(aggregationFields.get(0));
builder.script(new Script(ScriptType.INLINE, "painless", content,new HashMap<String,Object>()));
builder.size(Integer.MAX_VALUE);
return builder;
}
/**
* 聚合结果解析
*
*/
private static void resolveResult(Terms.Bucket bucket, List<String> fields, Map<String,Object> row, List<Map<String,Object>> resultSet) {
boolean flag = false;
for (String field : fields) {
Terms terms = bucket.getAggregations().get(field);
if (terms != null) {
for (Terms.Bucket bucket1 : terms.getBuckets()) {
row.put(field, bucket1.getKey());
Map<String,Object> newRow=new LinkedHashMap<>(row);
resolveResult(bucket1, fields, newRow,resultSet);
}
flag = true;
break;
}
}
if (!flag) {
// 到这里表示拿到了结果集中的一条
// System.out.println(row);
resultSet.add(row);
}
}
public static void main(String[] args) throws IOException {
Settings settings = Settings.builder()
.put("cluster.name", elasticSearchClusterName)
.put("client.transport.sniff", true).build();
// 与ElasticSearch建立连接
TransportAddress transportAddress = new InetSocketTransportAddress(InetAddress.getByName(elasticSearchAddress),
elasticSearchPort);
TransportClient client = new PreBuiltTransportClient(settings).addTransportAddress(transportAddress);
// 这里为分组的字段列表
List<String> groupFields = new ArrayList<>();
groupFields.add("team");
groupFields.add("age");
// 构造聚合条件
TermsAggregationBuilder aggregationBuilder=buildTermsAggregationBuilder(groupFields);
aggregationBuilder.size(10);
// 构造查询请求
SearchRequestBuilder searchRequestBuilder=client.prepareSearch(ALL_INDEX)
.addAggregation(aggregationBuilder);
//System.out.println(searchRequestBuilder.toString());
// 执行检索请求
SearchResponse response =searchRequestBuilder
.execute()
.actionGet();
// 处理结果数据
List<Map<String,Object>> resultSet=new LinkedList<>();
Terms terms = response.getAggregations().get(groupFields.get(0));
if (null != terms) {
for (Terms.Bucket bucket : terms.getBuckets()) {
Map<String, Object> one = new LinkedHashMap<>();
one.put(groupFields.get(0), bucket.getKey());
resolveResult(bucket, groupFields, one, resultSet);
}
}
// 将结果全部打印
resultSet.stream().forEach(row->System.out.println(row));
System.out.println("total size = "+ resultSet.size());
}
}
2、使用 copy_to 字段(需要提前mapping,暂不考虑)
在创建索引的mapping时创建一个新的专用copy_to字段,后续可以在这个字段上进行汇总查询。
3、multi_terms 聚合
Multi Terms aggregation | Elasticsearch Guide [7.13] | Elastic
multi_terms 聚合将来自多个字段的术语组合成一个复合key。会禁用全局序数,并且比从单个term聚合要慢。
注意7.12版本新特性,旧版本不用考虑
dsl:
GET /products/_search
{
"aggs": {
"genres_and_products": {
"multi_terms": {
"terms": [{
"field": "genre"
}, {
"field": "product"
}]
}
}
}
}
code:
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
/**
* 基于迭代嵌套bucket的聚合计算
*
* @author tang
*
*/
public class AggsIteratorBucketGroupBy {
private static final String elasticSearchClusterName = "cluster";
private static final String elasticSearchAddress = "127.0.0.1";
private static final int elasticSearchPort = 9300;
private static String ALL_INDEX = "*";
/**
* 构造分组聚合Builder
*/
public static AggregationBuilder buildTermsAggregationBuilder(List<String> aggregationFields){
Iterator<String> iterator = aggregationFields.iterator();
TermsAggregationBuilder builderRoot = null;
TermsAggregationBuilder builderCursor = null;
while(iterator.hasNext()){
String field = iterator.next();
TermsAggregationBuilder builder = AggregationBuilders.terms(aggregationFields.stream().collect(Collectors.joining())).field(field);
builder.size(Integer.MAX_VALUE);
if(builderRoot == null){
builderRoot = builder;
builderCursor = builderRoot;
}else {
builderCursor.subAggregation(builder);
builderCursor = builder;
}
}
return builderRoot;
}
/**
* 聚合结果解析
*/
private static void resolveResult(Terms.Bucket bucket, List<String> fields, Map<String,Object> row, List<Map<String,Object>> resultSet) {
boolean flag = false;
for (String field : fields) {
Terms terms = bucket.getAggregations().get(field);
if (terms != null) {
for (Terms.Bucket bucket1 : terms.getBuckets()) {
row.put(field, bucket1.getKey());
Map<String,Object> newRow=new LinkedHashMap<>(row);
resolveResult(bucket1, fields, newRow,resultSet);
}
flag = true;
break;
}
}
if (!flag) {
// 到这里表示拿到了结果集中的一条
// System.out.println(row);
resultSet.add(row);
}
}
public static void main(String[] args) throws IOException {
Settings settings = Settings.builder()
.put("cluster.name", elasticSearchClusterName)
.put("client.transport.sniff", true).build();
// 与ElasticSearch建立连接
TransportAddress transportAddress = new InetSocketTransportAddress(InetAddress.getByName(elasticSearchAddress), elasticSearchPort);
TransportClient client = new PreBuiltTransportClient(settings).addTransportAddress(transportAddress);
// 这里为分组的字段列表
List<String> groupFields = new ArrayList<>();
groupFields.add("team");
groupFields.add("age");
// 构造聚合条件
AggregationBuilder aggregationBuilder=buildTermsAggregationBuilder(groupFields);
// 构造查询请求,注意:ElasticSearch的分组聚合查询无法分页
SearchRequestBuilder searchRequestBuilder=client.prepareSearch(ALL_INDEX)
.addAggregation(aggregationBuilder);
//System.out.println(searchRequestBuilder.toString());
// 执行检索请求
SearchResponse response =searchRequestBuilder
.execute()
.actionGet();
// 处理结果数据
List<Map<String,Object>> resultSet=new LinkedList<>();
Terms terms = response.getAggregations().get(groupFields.stream().collect(Collectors.joining()));
if (null != terms) {
for (Terms.Bucket bucket : terms.getBuckets()) {
Map<String, Object> one = new LinkedHashMap<>();
one.put(groupFields.get(0), bucket.getKey());
resolveResult(bucket, groupFields, one, resultSet);
}
}
// 将结果全部打印
resultSet.stream().forEach(row->System.out.println(row));
System.out.println("total size = "+ resultSet.size());
}
}