Elasticsearch 聚合性能优化 -- Multi Search (附Golang 实现逻辑)

1. ES 聚合

Elasticsearch 查询条件中可以同时有多个条件聚合,但这个时候的多个聚合不是并行运行的。
例如:当前ES2.6亿的数据总量,使用下面聚合方式,耗时10s左右

GET /test_index/_search
{
  "size": 0,
  "aggs": {
    "countries": {
      "terms": {
        "field": "country",
        "size": 10
      }
    },
    "ports": {
    "terms": {
      "field": "port",
      "size": 10
    }
  },
  "servers": {
    "terms": {
      "field": "server.keyword",
      "size": 10
    }
  },
  "titles": {
    "terms": {
      "field": "title.keyword",
      "size": 10
    }
  },
  "protocols": {
    "terms": {
      "field": "protocol",
      "size": 10
    }
  }
  }
}

2. Multi Search

Multi Search会将聚合拆解,会并行执行多个聚合条件,性能上会有显著提升。
注意:

  1. Multi Search 一个index名称和一个查询语句为一组数据,各占一行,内容不可换行。
  2. Mutli Search 返回的数据结构和普通聚合方式不一样。
  3. Mutli Search 不仅限于聚合查询。多个index,不同检索条件,都可以使用。相当于是,一次请求,执行多条sql语句。
    例如:同样2.6亿的数据总量,使用下面聚合方式,耗时3s左右
POST _msearch
{"index":"test_index"} // 第一组查询, index名称,内容不可换行
{"size": 0,"aggs": {"countries": {"terms": {"field": "country","size": 10}}}} // 第一组查询,查询条件,内容不可换行
{"index":"test_index"} // 第二组查询, index名称,内容不可换行
{"size": 0,"aggs": {"ports": {"terms": {"field": "port","size": 10}}}} // 第二组查询,查询条件,内容不可换行
{"index":"test_index"}
{"size": 0,"aggs": {"protocol": {"terms": {"field": "protocol","size": 10}}}}
{"index":"test_index"}
{"size": 0,"aggs": {"servers": {"terms": {"field": "server.keyword","size": 10}}}}
{"index":"test_index"}
{"size": 0,"aggs": {"titles": {"terms": {"field": "title.keyword","size": 10}}}}

3. Golang 实现普通聚合

func SearchAggs(request *models.Request) (models.Response, error) {
   response := models.Response{}
   // 拼接dsl查询语句,自己实现
   queryDsl, err := GetDslByRequest(request.QueryParamsDecode)
   if err != nil {
      return response, err
   }
   
   // 拼接聚合条件
   countryAggs := elastic.NewTermsAggregation().Field("country")
   portAggs := elastic.NewTermsAggregation().Field("port")
   componentAggs := elastic.NewTermsAggregation().Field("server.keyword")
   protocolAggs := elastic.NewTermsAggregation().Field("protocol")
   titleAggs := elastic.NewTermsAggregation().Field("title.keyword")
   searchResult, err := esClient.Search("test_index").Query(queryDsl).Size(0).Aggregation("countries", countryAggs).Aggregation("ports", portAggs).Aggregation("components", componentAggs).Aggregation("protocols", protocolAggs).Aggregation("titles", titleAggs).Do(context.Background())
   if err != nil {
      log.Println("ES查询失败, error------>", err)
      return response, err
   }

   countryAggsResult, found := searchResult.Aggregations.Terms("countries")
   if !found {
      log.Println("根据条件查询, 国家聚合没有查询到结果")
   }
   countries := []models.CountryAggsVo{}
   for _, bucket := range countryAggsResult.Buckets {
      aggsVo := models.CountryAggsVo{}
      key := bucket.Key.(string)
      if len(key) > 0 {
         aggsVo.CountryCode = key
         aggsVo.Count = bucket.DocCount
         countries = append(countries, aggsVo)
      }
   }

   portAggsResult, found := searchResult.Aggregations.Terms("ports")
   if !found {
      log.Println("根据条件查询, 端口聚合没有查询到结果")
   }
   ports := []models.PortAggsVo{}
   for _, bucket := range portAggsResult.Buckets {
      aggsVo := models.PortAggsVo{}
      key := bucket.Key.(string)
      if len(key) > 0 {
         aggsVo.Port = key
         aggsVo.Count = bucket.DocCount
         ports = append(ports, aggsVo)
      }
   }

   componentAggsResult, found := searchResult.Aggregations.Terms("components")
   if !found {
      log.Println("根据条件查询, 服务聚合没有查询到结果")
   }
   components := []models.ComponentAggsVo{}
   for _, bucket := range componentAggsResult.Buckets {
      aggsVo := models.ComponentAggsVo{}
      key := bucket.Key.(string)
      if len(key) > 0 {
         aggsVo.Component = key
         aggsVo.Count = bucket.DocCount
         components = append(components, aggsVo)
      }
   }

   protocolAggsResult, found := searchResult.Aggregations.Terms("protocols")
   if !found {
      log.Println("根据条件查询, 端口聚合没有查询到结果")
   }
   protocols := []models.ProtocolAggsVo{}
   for _, bucket := range protocolAggsResult.Buckets {
      aggsVo := models.ProtocolAggsVo{}
      key := bucket.Key.(string)
      if len(key) > 0 {
         aggsVo.Protocol = key
         aggsVo.Count = bucket.DocCount
         protocols = append(protocols, aggsVo)
      }
   }

   titleAggsResult, found := searchResult.Aggregations.Terms("titles")
   if !found {
      log.Println("根据条件查询, 主题聚合没有查询到结果")
   }
   titles := []models.TitleAggsVo{}
   for _, bucket := range titleAggsResult.Buckets {
      aggsVo := models.TitleAggsVo{}
      key := bucket.Key.(string)
      if len(key) > 0 {
         aggsVo.Title = key
         aggsVo.Count = bucket.DocCount
         titles = append(titles, aggsVo)
      }
   }

   response.Countries = countries
   response.Ports = ports
   response.Servers = components
   response.Protocols = protocols
   response.Titles = titles

   return response, nil

}

4. Golang 实现Multi Search聚合

// multi search
func SearchAggs(request *models.Request) (models.Response, error) {
   response := models.Response{}
   // 拼接dsl查询语句, 自己实现
   queryDsl, err := GetDslByRequest(request.QueryParamsDecode)
   if err != nil {
      return response, err
   }
   // 拼接聚合条件
   countryAggs := elastic.NewTermsAggregation().Field("country")
   portAggs := elastic.NewTermsAggregation().Field("port")
   componentAggs := elastic.NewTermsAggregation().Field("server.keyword")
   protocolAggs := elastic.NewTermsAggregation().Field("protocol")
   titleAggs := elastic.NewTermsAggregation().Field("title.keyword")

   // 执行multi search
   multiSearchResult, err := esClient.MultiSearch().
      Add(elastic.NewSearchRequest().Index("test_index").Query(queryDsl).Size(0).Aggregation("countries", countryAggs)).
      Add(elastic.NewSearchRequest().Index("test_index").Query(queryDsl).Size(0).Aggregation("ports", portAggs)).
      Add(elastic.NewSearchRequest().Index("test_index").Query(queryDsl).Size(0).Aggregation("components", componentAggs)).
      Add(elastic.NewSearchRequest().Index("test_index").Query(queryDsl).Size(0).Aggregation("protocols", protocolAggs)).
      Add(elastic.NewSearchRequest().Index("test_index").Query(queryDsl).Size(0).Aggregation("titles", titleAggs)).Do(context.Background())
   if err != nil {
      log.Println("ES查询失败, error------>", err)
      return response, err
   }

   countries := []models.CountryAggsVo{}
   ports := []models.PortAggsVo{}
   components := []models.ComponentAggsVo{}
   protocols := []models.ProtocolAggsVo{}
   titles := []models.TitleAggsVo{}
   // 返回一个response数组
   for _, r := range multiSearchResult.Responses {
      if countryAggsResult, ok := r.Aggregations.Terms("countries"); ok {
         for _, bucket := range countryAggsResult.Buckets {
            aggsVo := models.CountryAggsVo{}
            key := bucket.Key.(string)
            if len(key) > 0 {
               aggsVo.CountryCode = key
               aggsVo.Count = bucket.DocCount
               countries = append(countries, aggsVo)
            }
         }
      }

      if portAggsResult, ok := r.Aggregations.Terms("ports"); ok {
         for _, bucket := range portAggsResult.Buckets {
            aggsVo := models.PortAggsVo{}
            key := bucket.Key.(string)
            if len(key) > 0 {
               aggsVo.Port = key
               aggsVo.Count = bucket.DocCount
               ports = append(ports, aggsVo)
            }
         }
      }

      if componentAggsResult, ok := r.Aggregations.Terms("components"); ok {
         for _, bucket := range componentAggsResult.Buckets {
            aggsVo := models.ComponentAggsVo{}
            key := bucket.Key.(string)
            if len(key) > 0 {
               aggsVo.Component = key
               aggsVo.Count = bucket.DocCount
               components = append(components, aggsVo)
            }
         }
      }

      if protocolAggsResult, ok := r.Aggregations.Terms("protocols"); ok {
         for _, bucket := range protocolAggsResult.Buckets {
            aggsVo := models.ProtocolAggsVo{}
            key := bucket.Key.(string)
            if len(key) > 0 {
               aggsVo.Protocol = key
               aggsVo.Count = bucket.DocCount
               protocols = append(protocols, aggsVo)
            }
         }
      }

      if titleAggsResult, ok := r.Aggregations.Terms("titles"); ok {
         for _, bucket := range titleAggsResult.Buckets {
            aggsVo := models.TitleAggsVo{}
            key := bucket.Key.(string)
            if len(key) > 0 {
               aggsVo.Title = key
               aggsVo.Count = bucket.DocCount
               titles = append(titles, aggsVo)
            }
         }
      }
   }

   response.Countries = countries
   response.Ports = ports
   response.Servers = components
   response.Protocols = protocols
   response.Titles = titles

   return response, nil
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的,以下是一个简单的ElasticSearch聚合的Java API示例: ```java import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.metrics.sum.Sum; import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount; import static org.elasticsearch.index.query.QueryBuilders.rangeQuery; public class ElasticSearchAggregationExample { public static void main(String[] args) { // 创建ElasticSearch客户端 Client client = // ...; // 构建查询条件 QueryBuilder query = QueryBuilders.boolQuery() .must(rangeQuery("timestamp").gte("2022-01-01T00:00:00").lte("2022-01-31T23:59:59")); // 构建聚合条件 AggregationBuilder aggregation = AggregationBuilders .dateHistogram("sales_over_time") .field("timestamp") .dateHistogramInterval(DateHistogramInterval.DAY) .subAggregation( AggregationBuilders .terms("product_types") .field("product_type") .subAggregation( AggregationBuilders.sum("total_sales").field("sales"), AggregationBuilders.count("transaction_count").field("transaction_id") ) ); // 执行查询 SearchResponse response = client.prepareSearch("my_index") .setQuery(query) .addAggregation(aggregation) .execute() .actionGet(); // 解析聚合结果 Histogram histogram = response.getAggregations().get("sales_over_time"); for (Histogram.Bucket bucket : histogram.getBuckets()) { System.out.println("Date: " + bucket.getKeyAsString()); Terms productTypes = bucket.getAggregations().get("product_types"); for (Terms.Bucket productType : productTypes.getBuckets()) { System.out.println("Product Type: " + productType.getKeyAsString()); Sum totalSales = productType.getAggregations().get("total_sales"); System.out.println("Total Sales: " + totalSales.getValue()); ValueCount transactionCount = productType.getAggregations().get("transaction_count"); System.out.println("Transaction Count: " + transactionCount.getValue()); } } // 关闭客户端 client.close(); } } ``` 这个示例通过ElasticSearch的Java API执行了一个聚合,其中包含了两层嵌套聚合,分别按照日期和产品类型对销售数据进行了汇总,输出了每个日期和产品类型的销售总额和交易次数。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值