elasticsearch-聚合查询

本篇目将写一个案例来介绍, java 集成 elasticsearch 并且实现常见的聚合查询

依赖引入

我们使用 springboot快速创建一个项目, pom.xml如下

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.5.4</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.elastic</groupId>
    <artifactId>demo</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>demo</name>
    <description>Demo project for Spring Boot</description>
    <properties>
        <java.version>1.8</java.version>
    </properties>
    <dependencies>
        <!--        引入 springboot 依赖-->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter</artifactId>
        </dependency>

        <!--        引入 elasticsearch 依赖-->
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <version>6.6.2</version>
        </dependency>

        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>6.6.2</version>
        </dependency>

        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>transport</artifactId>
            <version>6.6.2</version>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>
</project>

目录结构

在这里插入图片描述

代码详情

在这里我们做案例的话, 仅仅需要4个类. 分别如下

  • DemoApplication: 启动入口类
  • ElasticsearchRestClient: elasticsearch 配置类
  • BucketAggregationTest: Bucket aggregations 桶分聚合
  • MetricAggregationTest: 计算度量聚合

DemoApplication.java

package com.elastic.demo;

import org.springframework.boot.Banner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class DemoApplication {

    public static void main(String[] args) {
        SpringApplication app = new SpringApplication(DemoApplication.class);
        app.setBannerMode(Banner.Mode.OFF);
        app.run(args);
    }
}

ElasticsearchRestClient.java

package com.elastic.demo;

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.util.Arrays;
import java.util.Objects;


@Configuration
public class ElasticsearchRestClient {
    private static final Logger log = LoggerFactory.getLogger(ElasticsearchRestClient.class);
    private static final int ADDRESS_LENGTH = 2;
    private static final String HTTP_SCHEME = "http";

    String[] ipAddress = new String[]{"192.168.1.186:9200"};

    @Bean
    public RestClientBuilder restClientBuilder() {
        HttpHost[] hosts = Arrays.stream(ipAddress)
                .map(this::makeHttpHost)
                .filter(Objects::nonNull)
                .toArray(HttpHost[]::new);
        log.debug("hosts:{}", Arrays.toString(hosts));
        return RestClient.builder(hosts);
    }


    @Bean(name = "highLevelClient")
    public RestHighLevelClient highLevelClient(@Autowired RestClientBuilder restClientBuilder) {
        return new RestHighLevelClient(restClientBuilder);
    }


    private HttpHost makeHttpHost(String s) {
        String[] address = s.split(":");
        if (address.length == ADDRESS_LENGTH) {
            String ip = address[0];
            int port = Integer.parseInt(address[1]);
            return new HttpHost(ip, port, HTTP_SCHEME);
        } else {
            return null;
        }
    }
}

BucketAggregationTest.java

package com.elastic.demo;

import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.bucket.filter.*;
import org.elasticsearch.search.aggregations.bucket.global.Global;
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.missing.Missing;
import org.elasticsearch.search.aggregations.bucket.missing.MissingAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.nested.Nested;
import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.nested.ReverseNested;
import org.elasticsearch.search.aggregations.bucket.range.Range;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.joda.time.DateTime;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;

import java.io.IOException;

/**
 * @Title: Bucket aggregations 桶分聚合
 * <p>
 * Bucket aggregations 不像 metrics aggregations 那样计算指标,恰恰相反,
 * 它创建文档的buckets,每个buckets与标准(取决于聚合类型)相关联,
 * 它决定了当前上下文中的文档是否会“falls”到它。换句话说,bucket可以有效地定义文档集合。
 * 除了buckets本身,bucket集合还计算并返回“落入”每个bucket的文档数量。
 * <p>
 * 与度量聚合相比,Bucket聚合可以保存子聚合,这些子聚合将针对由其“父”bucket聚合创建的bucket进行聚合。
 * <p>
 * 有不同的bucket聚合器,每个具有不同的“bucketing”策略,一些定义一个单独的bucket,
 * 一些定义多个bucket的固定数量,另一些定义在聚合过程中动态创建bucket
 */
@SpringBootTest
class BucketAggregationTest {
    private static final Logger log = LoggerFactory.getLogger(BucketAggregationTest.class);
    @Autowired
    private RestHighLevelClient highLevelClient;

    /**
     * Global Aggregation 全局聚合
     * <p>
     * 定义搜索执行上下文中的所有文档的单个bucket,这个上下文由索引和您正在搜索的文档类型定义,但不受搜索查询本身的影响。
     * 全局聚合器只能作为顶层聚合器放置,因为将全局聚合器嵌入到另一个分组聚合器中是没有意义的
     */
    @Test
    void globalAggregation() throws IOException {
        GlobalAggregationBuilder aggregation = AggregationBuilders
                .global("agg")
                .subAggregation(AggregationBuilders.terms("classCode.keyword").field("classCode.keyword"));

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Global aggResult = response.getAggregations().get("agg");
        log.info(">>> {}", aggResult.getDocCount());
    }


    /**
     * Filter Aggregation 过滤聚合
     * <p>
     * 过滤聚合——基于一个条件,来对当前的文档进行过滤的聚合。
     */
    @Test
    void filterAggregation() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.termQuery("parentId", 5));

        FilterAggregationBuilder aggregation = AggregationBuilders
                .filter("agg", query);

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Filter aggResult = response.getAggregations().get("agg");
        log.info(">>> {}", aggResult.getDocCount());
    }


    /**
     * Filters Aggregation 多过滤聚合
     * <p>
     * 多过滤聚合——基于多个过滤条件,来对当前文档进行【过滤】的聚合,
     * 每个过滤都包含所有满足它的文档(多个bucket中可能重复)。
     */
    @Test
    void filtersAggregation() throws IOException {
        FiltersAggregationBuilder aggregation = AggregationBuilders
                .filters("agg",
                        new FiltersAggregator.KeyedFilter("men", QueryBuilders.termQuery("parentId", 5)),
                        new FiltersAggregator.KeyedFilter("women", QueryBuilders.termQuery("parentId", 1)));

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Filters aggResult = response.getAggregations().get("agg");

        for (Filters.Bucket entry : aggResult.getBuckets()) {
            // bucket key
            String key = entry.getKeyAsString();
            // Doc count
            long docCount = entry.getDocCount();
            log.info(">>> key: {}, doc_count: {}", key, docCount);
        }
    }


    /**
     * Missing Aggregation 基于字段数据的单桶聚合
     * <p>
     * 基于字段数据的单桶聚合,创建当前文档集上下文中缺少字段值的所有文档的bucket(桶)(有效地,丢失了一个字段或配置了NULL值集),
     * 此聚合器通常与其他字段数据桶聚合器(例如范围)结合使用,以返回由于缺少字段数据值而无法放置在任何其他存储区中的所有文档的信息
     */
    @Test
    void missingAggregation() throws IOException {
        MissingAggregationBuilder aggregation = AggregationBuilders.missing("agg").field("parentId");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Missing aggResult = response.getAggregations().get("agg");
        log.info(">>> {}", aggResult.getDocCount());
    }


    /**
     * Nested Aggregation 嵌套类型聚合
     * <p>
     * 基于嵌套(nested)数据类型,把该【嵌套类型的信息】聚合到单个桶里,然后就可以对嵌套类型做进一步的聚合操作
     */
    @Test
    void nestedAggregation() throws IOException {
        NestedAggregationBuilder aggregation = AggregationBuilders.nested("agg", "parentId");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Nested aggResult = response.getAggregations().get("agg");
        log.info(">>> {}", aggResult.getDocCount());
    }


    /**
     * Reverse nested Aggregation
     * <p>
     * 一个特殊的单桶聚合,可以从嵌套文档中聚合父文档。
     * 实际上,这种聚合可以从嵌套的块结构中跳出来,并链接到其他嵌套的结构或根文档.
     * 这允许嵌套不是嵌套对象的一部分的其他聚合在嵌套聚合中。
     * reverse_nested 聚合必须定义在nested之中
     */
    @Test
    void reverseNestedAggregation() throws IOException {
        NestedAggregationBuilder aggregation = AggregationBuilders
                .nested("agg", "resellers")
                .subAggregation(
                        AggregationBuilders
                                .terms("type").field("resellers.type")
                                .subAggregation(
                                        AggregationBuilders
                                                .reverseNested("reseller_to_product")
                                )
                );

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Nested aggResult = response.getAggregations().get("agg");
        Terms name = aggResult.getAggregations().get("type");
        for (Terms.Bucket bucket : name.getBuckets()) {
            ReverseNested resellerToProduct = bucket.getAggregations().get("reseller_to_product");
            log.info(">>> {}", resellerToProduct.getDocCount());
        }
    }


    /**
     * Terms Aggregation 词元聚合
     * <p>
     * 基于某个field,该 field 内的每一个【唯一词元】为一个桶,并计算每个桶内文档个数。
     * 默认返回顺序是按照文档个数多少排序。当不返回所有 buckets 的情况,文档个数可能不准确。
     */
    @Test
    void termsAggregation() throws IOException {
        TermsAggregationBuilder aggregation = AggregationBuilders
                .terms("agg")
                .field("parentId");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Terms aggResult = response.getAggregations().get("agg");
        for (Terms.Bucket entry : aggResult.getBuckets()) {
            log.info(">>> key: {}, docCount: {}", entry.getKey(), entry.getDocCount());
        }
    }


    /**
     * Order 排序
     * <p>
     * 基于某个field,该 field 内的每一个【唯一词元】为一个桶,并计算每个桶内文档个数。
     * 默认返回顺序是按照文档个数多少排序。当不返回所有 buckets 的情况,文档个数可能不准确。
     */
    @Test
    void orderAggregation() throws IOException {
        // 通过 doc_count 按升序排列
        // TermsAggregationBuilder aggregation = AggregationBuilders
        //         .terms("agg")
        //         .field("parentId")
        //         .order(BucketOrder.count(true));

        // 通过 key 按升序排列
        // TermsAggregationBuilder aggregation = AggregationBuilders
        //         .terms("agg")
        //         .field("parentId")
        //         .order(BucketOrder.key(true));

        // 按 metrics 子聚合排列(标示为聚合名)
        TermsAggregationBuilder aggregation = AggregationBuilders
                .terms("agg")
                .field("parentId")
                .order(BucketOrder.aggregation("avg_parentId", false))
                .subAggregation(
                        AggregationBuilders.avg("avg_parentId").field("avg_parentId")
                );

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Terms aggResult = response.getAggregations().get("agg");
        for (Terms.Bucket entry : aggResult.getBuckets()) {
            log.info(">>> key: {}, docCount: {}", entry.getKey(), entry.getDocCount());
        }
    }


    /**
     * Significant Terms Aggregation
     * <p>
     * 返回集合中感兴趣的或者不常见的词条的聚合
     */
    @Test
    void significantTermsAggregation() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.termQuery("parentId", 5));

        SignificantTermsAggregationBuilder aggregation = AggregationBuilders
                .significantTerms("agg")
                .field("parentId");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        sourceBuilder.query(query);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        SignificantTerms aggResult = response.getAggregations().get("agg");
        for (SignificantTerms.Bucket entry : aggResult.getBuckets()) {
            log.info(">>> key: {}, docCount: {}", entry.getKey(), entry.getDocCount());
        }
    }


    /**
     * Date Range Aggregation 日期范围聚合
     * <p>
     * 日期范围聚合——基于日期类型的值,以【日期范围】来桶分聚合。
     * <p>
     * 日期范围可以用各种 Date Math 表达式。
     * <p>
     * 同样的,包括 from 的值,不包括 to 的值。
     */
    @Test
    void dateRangeAggregation() throws IOException {
        AggregationBuilder aggregation =
                AggregationBuilders
                        .dateRange("agg")
                        .field("createTime")
                        .format("yyyyMMddHHmmss")
                        .addUnboundedTo("20160522161616")     // Less than 20160522161616
                        .addRange("20160522161616", "20210522161616")  // 20160522161616 --- 20210522161616
                        .addUnboundedFrom("20210522161616"); // more than 20210522161616

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Range aggResult = response.getAggregations().get("agg");

        for (Range.Bucket entry : aggResult.getBuckets()) {
            String key = entry.getKeyAsString();                // Date range as key
            DateTime fromAsDate = (DateTime) entry.getFrom();   // Date bucket from as a Date
            DateTime toAsDate = (DateTime) entry.getTo();       // Date bucket to as a Date
            long docCount = entry.getDocCount();                // Doc count
            log.info("key [{}], from [{}], to [{}], doc_count [{}]", key, fromAsDate, toAsDate, docCount);
        }
    }


    /**
     * Histogram Aggregation 直方图聚合
     * <p>
     * 基于文档中的某个【数值类型】字段,通过计算来动态的分桶。
     */
    @Test
    void histogramAggregation() throws IOException {
        AggregationBuilder aggregation = AggregationBuilders
                        .histogram("agg")
                        .field("parentId")
                        .interval(1);

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);
        SearchRequest searchRequest = new SearchRequest("dcvciclass");

        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Range aggResult = response.getAggregations().get("agg");

        for (Range.Bucket entry : aggResult.getBuckets()) {
            String key = entry.getKeyAsString();                // Date range as key
            DateTime fromAsDate = (DateTime) entry.getFrom();   // Date bucket from as a Date
            DateTime toAsDate = (DateTime) entry.getTo();       // Date bucket to as a Date
            long docCount = entry.getDocCount();                // Doc count
            log.info("key [{}], from [{}], to [{}], doc_count [{}]", key, fromAsDate, toAsDate, docCount);
        }
    }
}

MetricAggregationTest.java

package com.elastic.demo;

import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
import org.elasticsearch.search.aggregations.metrics.avg.AvgAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.max.Max;
import org.elasticsearch.search.aggregations.metrics.max.MaxAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.min.Min;
import org.elasticsearch.search.aggregations.metrics.min.MinAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentileRanks;
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles;
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentilesAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.stats.Stats;
import org.elasticsearch.search.aggregations.metrics.stats.StatsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStats;
import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStatsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
import org.elasticsearch.search.aggregations.metrics.sum.SumAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.tophits.TopHits;
import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount;
import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCountAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;

import java.io.IOException;
import java.math.BigDecimal;

/**
 * @Title: 计算度量聚合
 * <p>
 * 计算度量这类的聚合操作是以使用一种方式或者从文档中提取需要聚合的值为基础的。
 * 这些数据不但可以从文档(使用数据属性)的属性中提取出来,也可以使用脚本生成。
 * <p>
 * 数值计量聚合操作是能够产生具体的数值的一种计量聚合操作。
 * 一些聚合操作输出单个的计量数值(例如avg),并且被称作single-value numeric metric aggregation,
 * 其他产生多个计量数值(例如 stats)的称作 multi-value numeric metrics aggregation。
 * 这两种不同的聚合操作只有在桶聚合的子聚合操作中才会有不同的表现(有些桶聚合可以基于每个的数值计量来对返回的桶进行排序)
 */
@SpringBootTest
class MetricAggregationTest {
    private static final Logger log = LoggerFactory.getLogger(MetricAggregationTest.class);
    @Autowired
    private RestHighLevelClient highLevelClient;

    /**
     * Min Aggregatione 最小值聚合
     */
    @Test
    void metricsMin() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.rangeQuery("classLvl").gte(2));

        MinAggregationBuilder aggregation =
                AggregationBuilders
                        .min("agg")
                        .field("classLvl");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(query);
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Min aggResult = response.getAggregations().get("agg");
        log.info(">>> {}", BigDecimal.valueOf(aggResult.getValue()));
    }

    /**
     * Max Aggregation 最大值聚合
     */
    @Test
    void metricsMax() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.rangeQuery("classLvl").gte(2));

        MaxAggregationBuilder aggregation =
                AggregationBuilders
                        .max("agg")
                        .field("classLvl");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(query);
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Max aggResult = response.getAggregations().get("agg");
        log.info(">>> {}", BigDecimal.valueOf(aggResult.getValue()));
    }


    /**
     * Sum Aggregation 求和聚合
     */
    @Test
    void metricsSum() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.rangeQuery("classLvl").gte(2));

        SumAggregationBuilder aggregation =
                AggregationBuilders
                        .sum("agg")
                        .field("classLvl");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(query);
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Sum aggResult = response.getAggregations().get("agg");
        log.info(">>> {}", BigDecimal.valueOf(aggResult.getValue()));
    }


    /**
     * Avg Aggregation 平均值聚合
     */
    @Test
    void metricsAvg() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.rangeQuery("classLvl").gte(2));

        AvgAggregationBuilder aggregation =
                AggregationBuilders
                        .avg("agg")
                        .field("classLvl");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(query);
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Avg aggResult = response.getAggregations().get("agg");
        log.info(">>> {}", BigDecimal.valueOf(aggResult.getValue()));
    }


    /**
     * Stats Aggregation 统计聚合
     * <p>
     * 统计聚合——基于文档的某个值,计算出一些统计信息(min、max、sum、count、avg),
     * 用于计算的值可以是特定的数值型字段,也可以通过脚本计算而来。
     */
    @Test
    void metricsStats() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.rangeQuery("classLvl").gte(2));

        StatsAggregationBuilder aggregation =
                AggregationBuilders
                        .stats("agg")
                        .field("classLvl");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(query);
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Stats aggResult = response.getAggregations().get("agg");

        log.info("Min >>> {}", BigDecimal.valueOf(aggResult.getMin()));
        log.info("Max >>> {}", BigDecimal.valueOf(aggResult.getMax()));
        log.info("Avg >>> {}", BigDecimal.valueOf(aggResult.getAvg()));
        log.info("Sum >>> {}", BigDecimal.valueOf(aggResult.getSum()));
        log.info("Count >>> {}", aggResult.getCount());
    }


    /**
     * Extended Stats Aggregation 扩展统计聚合
     * <p>
     * 扩展统计聚合——基于文档的某个值,计算出一些统计信息
     * (比普通的stats聚合多了sum_of_squares、variance、std_deviation、std_deviation_bounds),
     * 用于计算的值可以是特定的数值型字段,也可以通过脚本计算而来。
     */
    @Test
    void metricsExtendedStats() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.rangeQuery("classLvl").gte(20));

        ExtendedStatsAggregationBuilder aggregation =
                AggregationBuilders
                        .extendedStats("agg")
                        .field("classLvl");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(query);
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        ExtendedStats aggResult = response.getAggregations().get("agg");

        log.info("Min >>> {}", BigDecimal.valueOf(aggResult.getMin()));
        log.info("Max >>> {}", BigDecimal.valueOf(aggResult.getMax()));
        log.info("Avg >>> {}", BigDecimal.valueOf(aggResult.getAvg()));
        log.info("Sum >>> {}", BigDecimal.valueOf(aggResult.getSum()));
        log.info("Count >>> {}", aggResult.getCount());

        log.info("stdDeviation >>> {}", BigDecimal.valueOf(aggResult.getStdDeviation()));
        log.info("sumOfSquares >>> {}", BigDecimal.valueOf(aggResult.getSumOfSquares()));
        log.info("sumOfSquares >>> {}", BigDecimal.valueOf(aggResult.getVariance()));
    }


    /**
     * Value Count Aggregation 值计数聚合
     * <p>
     * 值计数聚合——计算聚合文档中某个值的个数, 用于计算的值可以是特定的数值型字段,也可以通过脚本计算而来。
     */
    @Test
    void metricsValueCount() throws IOException {
        BoolQueryBuilder query = QueryBuilders.boolQuery();
        query.must(QueryBuilders.rangeQuery("classLvl").gte(20));

        ValueCountAggregationBuilder aggregation =
                AggregationBuilders
                        .count("agg")
                        .field("classLvl");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(query);
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        ValueCount aggResult = response.getAggregations().get("agg");
        log.info("Count >>> {}", BigDecimal.valueOf(aggResult.getValue()));
    }


    /**
     * Percentile Aggregation 百分百聚合
     * <p>
     * 百分百聚合——基于聚合文档中某个数值类型的值,求这些值中的一个或者多个百分比,
     * 用于计算的值可以是特定的数值型字段,也可以通过脚本计算而来。
     */
    @Test
    void metricsPercentiles() throws IOException {
        PercentilesAggregationBuilder aggregation =
                AggregationBuilders
                        .percentiles("agg")
                        .field("id");
        // 自定义百分数位
        // .percentiles(1.0, 6.0, 10.0, 20.0, 30.0, 75.0, 95.0, 99.0);

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Percentiles aggResult = response.getAggregations().get("agg");
        aggResult.forEach(result -> {
            log.info("percent >>> {}", result.getPercent());
            log.info("value >>> {}", result.getValue());
        });
    }


    /**
     * Percentile Ranks Aggregation 百分比等级聚合
     * <p>
     * 一个multi-value指标聚合,它通过从聚合文档中提取数值来计算一个或多个百分比。
     */
    @Test
    void metricsPercentilesRanks() throws IOException {
        PercentilesAggregationBuilder aggregation =
                AggregationBuilders
                        .percentiles("agg")
                        .field("classLvl");

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        PercentileRanks aggResult = response.getAggregations().get("agg");
        aggResult.forEach(result -> {
            log.info("percent >>> {}", result.getPercent());
            log.info("value >>> {}", result.getValue());
        });
    }


    /**
     * Top Hits Aggregation 最高匹配权值聚合
     * <p>
     * 最高匹配权值聚合——跟踪聚合中相关性最高的文档。
     * 该聚合一般用做 sub-aggregation,以此来聚合每个桶中的最高匹配的文档。
     */
    @Test
    void metricsTopHits() throws IOException {
        // 大多数标准的搜索选项可以使用 from, size, sort, highlight, explain 等
        AggregationBuilder aggregation = AggregationBuilders
                .terms("agg").field("parentId")
                .subAggregation(
                        AggregationBuilders.topHits("top")
                                // .explain(true)
                                .size(1)
                                .from(10)
                );

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(QueryBuilders.boolQuery());
        sourceBuilder.aggregation(aggregation);

        SearchRequest searchRequest = new SearchRequest("dcvciclass");
        searchRequest.source(sourceBuilder);
        SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Terms aggResult = response.getAggregations().get("agg");

        for (Terms.Bucket entry : aggResult.getBuckets()) {
            log.info(">>> bucket_key: {}, doc_count: {}", entry.getKey(), entry.getDocCount());

            // We ask for top_hits for each bucket
            TopHits topHits = entry.getAggregations().get("top");
            for (SearchHit hit : topHits.getHits().getHits()) {
                log.info(">>> id [{}], _source [{}]", hit.getId(), hit.getSourceAsString());
            }
        }
    }
}

以上便是关于 elasticsearch-聚合查询示例


完整工程地址

https://github.com/XQfelix/elastic-agg

Elasticsearch中,聚合(aggregation)是一种强大的数据分析工具,可以对文档进行统计分析并返回计算结果。其中,Metric聚合是一种聚合类型,它会对文档中的某些数值型字段进行统计计算,例如:平均值、最大值、最小值、总和等。 下面我们来看一下如何在Elasticsearch中使用Metric聚合进行数据分析。 假设我们有一个存储了销售数据的索引,其中每个文档都包含了产品的名称、价格、销售量等信息。我们想要统计该索引中所有产品的平均价格、最高价格、最低价格以及销售总量,可以使用以下的聚合查询DSL: ``` GET /sales/_search { "size": 0, "aggs": { "avg_price": { "avg": { "field": "price" } }, "max_price": { "max": { "field": "price" } }, "min_price": { "min": { "field": "price" } }, "total_sales": { "sum": { "field": "sales" } } } } ``` 在上述查询中,我们使用了四个不同的Metric聚合:avg(平均值)、max(最大值)、min(最小值)和sum(总和)。每个聚合都针对文档中的price和sales字段进行了计算,最终返回了平均价格、最高价格、最低价格以及销售总量的计算结果。 在聚合查询中,我们还可以使用多个Metric聚合组合起来进行更加复杂的数据分析。例如,我们可以计算不同销售区域的平均价格和销售总量,可以使用以下的聚合查询DSL: ``` GET /sales/_search { "size": 0, "aggs": { "by_region": { "terms": { "field": "region" }, "aggs": { "avg_price": { "avg": { "field": "price" } }, "total_sales": { "sum": { "field": "sales" } } } } } } ``` 在上述查询中,我们首先使用了terms聚合将文档按照region字段进行了分组,然后在每个分组中使用了两个不同的Metric聚合:avg(平均值)和sum(总和)。最终返回了不同销售区域的平均价格和销售总量的计算结果。 总之,Metric聚合是Elasticsearch中非常有用的数据分析工具,可以帮助我们对文档中的数值型字段进行统计分析并返回计算结果。在实际应用中,我们可以根据具体的业务需求来选择不同的Metric聚合进行数据分析。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值