ElasticSearch多字段分组聚合VS mysql group by a,b

一、ElasticSearch的分组聚合
官网文档地址:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_multi_field_terms_aggregation

实现背景:实现类似SQL的group by功能:

select team,age from a_person group by team,age;


二、多字段分组聚合的三种实现方法(性能不高)
在ElasticSearch中,实现多字段分组聚合的方式包括:

1、使用 Script
使用脚本script从多个字段聚合分组:

dsl:

GET /_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "group_ab": {
      "terms": {
        "script": {
          "source": "doc['a'].value +','+ doc['b'].value",
          "lang": "painless"
        }
      }
    }
  }
}
}

code:

import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

/**
 * 基于单bucket实现的多字段group by聚合
 * 
 * @author tang
 *
 */
public class AggsOneBucketGroupBy {

    private static final String elasticSearchClusterName = "cluster";
    private static final String elasticSearchAddress = "127.0.0.1";
    private static final int elasticSearchPort = 9300;

    private static String ALL_INDEX = "*";
    private static String SEPARATOR = "|";


    public static TermsAggregationBuilder buildTermsAggregationBuilder(List<String> aggregationFields) {
        String content = aggregationFields.stream().map(one -> String.format("doc['%s'].value", one))
                .collect(Collectors.joining("+'" + SEPARATOR + "'+"));
        
        TermsAggregationBuilder builder = AggregationBuilders.terms(aggregationFields.get(0));
        builder.script(new Script(ScriptType.INLINE, "painless", content,new HashMap<String,Object>()));
        builder.size(Integer.MAX_VALUE);
        return builder;
    }

    /**
     * 聚合结果解析
     * 
     */
    private static void resolveResult(Terms.Bucket bucket, List<String> fields, Map<String,Object> row, List<Map<String,Object>> resultSet) {
            boolean flag = false;
            for (String field : fields) {
                Terms terms = bucket.getAggregations().get(field);
                if (terms != null) {
                    for (Terms.Bucket bucket1 : terms.getBuckets()) {
                        row.put(field, bucket1.getKey());
                        
                        Map<String,Object> newRow=new LinkedHashMap<>(row);
                        resolveResult(bucket1, fields, newRow,resultSet);
                    }
                    
                    flag = true;
                    break;
                }
            }
            if (!flag) {
                // 到这里表示拿到了结果集中的一条
                // System.out.println(row);
                resultSet.add(row);
            }
        }
    
    public static void main(String[] args) throws IOException {
        Settings settings = Settings.builder()
                .put("cluster.name", elasticSearchClusterName)
                .put("client.transport.sniff", true).build();

        // 与ElasticSearch建立连接
        TransportAddress transportAddress = new InetSocketTransportAddress(InetAddress.getByName(elasticSearchAddress),
                elasticSearchPort);
        TransportClient client = new PreBuiltTransportClient(settings).addTransportAddress(transportAddress);

        // 这里为分组的字段列表
        List<String> groupFields = new ArrayList<>();
        groupFields.add("team");
        groupFields.add("age");
        
        // 构造聚合条件
        TermsAggregationBuilder aggregationBuilder=buildTermsAggregationBuilder(groupFields);
        aggregationBuilder.size(10);
        
        // 构造查询请求
        SearchRequestBuilder searchRequestBuilder=client.prepareSearch(ALL_INDEX)
                .addAggregation(aggregationBuilder);
        //System.out.println(searchRequestBuilder.toString());
        
        // 执行检索请求
        SearchResponse response =searchRequestBuilder 
                .execute()
                .actionGet();
        
        // 处理结果数据
        List<Map<String,Object>> resultSet=new LinkedList<>();
        Terms terms = response.getAggregations().get(groupFields.get(0));
        if (null != terms) {
            for (Terms.Bucket bucket : terms.getBuckets()) {
                Map<String, Object> one = new LinkedHashMap<>();
                one.put(groupFields.get(0), bucket.getKey());
                resolveResult(bucket, groupFields, one, resultSet);
            }
        }
        
        // 将结果全部打印
        resultSet.stream().forEach(row->System.out.println(row));
        System.out.println("total size = "+ resultSet.size());
    }

}


2、使用 copy_to 字段(需要提前mapping,暂不考虑)

在创建索引的mapping时创建一个新的专用copy_to字段,后续可以在这个字段上进行汇总查询。

3、multi_terms 聚合

Multi Terms aggregation | Elasticsearch Guide [7.13] | Elastic

multi_terms 聚合将来自多个字段的术语组合成一个复合key。会禁用全局序数,并且比从单个term聚合要慢。

注意7.12版本新特性,旧版本不用考虑

dsl:

GET /products/_search
{
  "aggs": {
    "genres_and_products": {
      "multi_terms": {
        "terms": [{
          "field": "genre" 
        }, {
          "field": "product"
        }]
      }
    }
  }
}

code:

import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

/**
 * 基于迭代嵌套bucket的聚合计算
 * 
 * @author tang
 *
 */
public class AggsIteratorBucketGroupBy {

    private static final String elasticSearchClusterName = "cluster";
    private static final String elasticSearchAddress = "127.0.0.1";
    private static final int elasticSearchPort = 9300;

    private static String ALL_INDEX = "*";

    /**
     * 构造分组聚合Builder
     */
    public static AggregationBuilder buildTermsAggregationBuilder(List<String> aggregationFields){
        Iterator<String> iterator = aggregationFields.iterator();
        TermsAggregationBuilder builderRoot = null;
        TermsAggregationBuilder builderCursor = null;
        while(iterator.hasNext()){
            String field = iterator.next();
            TermsAggregationBuilder builder = AggregationBuilders.terms(aggregationFields.stream().collect(Collectors.joining())).field(field);
            builder.size(Integer.MAX_VALUE);
            if(builderRoot == null){
                builderRoot = builder;
                builderCursor = builderRoot;
            }else {
                builderCursor.subAggregation(builder);
                builderCursor = builder;
            }
        }
        return builderRoot;
    }

    /**
     * 聚合结果解析
     */
    private static void resolveResult(Terms.Bucket bucket, List<String> fields, Map<String,Object> row, List<Map<String,Object>> resultSet) {
            boolean flag = false;
            for (String field : fields) {
                Terms terms = bucket.getAggregations().get(field);
                if (terms != null) {
                    for (Terms.Bucket bucket1 : terms.getBuckets()) {
                        row.put(field, bucket1.getKey());
                        
                        Map<String,Object> newRow=new LinkedHashMap<>(row);
                        resolveResult(bucket1, fields, newRow,resultSet);
                    }
                    
                    flag = true;
                    break;
                }
            }
            if (!flag) {
                // 到这里表示拿到了结果集中的一条
                // System.out.println(row);
                resultSet.add(row);
            }
        }
    
    public static void main(String[] args) throws IOException {
        Settings settings = Settings.builder()
                .put("cluster.name", elasticSearchClusterName)
                .put("client.transport.sniff", true).build();

        // 与ElasticSearch建立连接
        TransportAddress transportAddress = new InetSocketTransportAddress(InetAddress.getByName(elasticSearchAddress),    elasticSearchPort);
        TransportClient client = new PreBuiltTransportClient(settings).addTransportAddress(transportAddress);

        // 这里为分组的字段列表
        List<String> groupFields = new ArrayList<>();
        groupFields.add("team");
        groupFields.add("age");
        
        // 构造聚合条件
        AggregationBuilder aggregationBuilder=buildTermsAggregationBuilder(groupFields);
        
        // 构造查询请求,注意:ElasticSearch的分组聚合查询无法分页
        SearchRequestBuilder searchRequestBuilder=client.prepareSearch(ALL_INDEX)
                .addAggregation(aggregationBuilder);
        //System.out.println(searchRequestBuilder.toString());
        
        // 执行检索请求
        SearchResponse response =searchRequestBuilder 
                .execute()
                .actionGet();
        
        // 处理结果数据
        List<Map<String,Object>> resultSet=new LinkedList<>();
        Terms terms = response.getAggregations().get(groupFields.stream().collect(Collectors.joining()));
        if (null != terms) {
            for (Terms.Bucket bucket : terms.getBuckets()) {
                Map<String, Object> one = new LinkedHashMap<>();
                one.put(groupFields.get(0), bucket.getKey());
                resolveResult(bucket, groupFields, one, resultSet);
            }
        }
        
        // 将结果全部打印
        resultSet.stream().forEach(row->System.out.println(row));
        System.out.println("total size = "+ resultSet.size());
    }

}

  • 1
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值