Elasticsearch的聚合搜索
和mysql一样,Elasticsearch也支持对数据根据某一字段进行分组然后进行聚合分析。
Elasticsearch聚合搜索有两个比较常见并且重要的概念,bucket和metric。
- Bucket,就是数据的分组,对某个字段进行分组的时候,这个字段值相同的那些数据就会被放到一个bucket中,SQL中的GROUP BY语法。
- Metric,对一个数据分组执行的统计。当有了一堆bucket的时候,就可以对每个bucket中的数据进行聚合分析了。
数据准备
这里简单创建一个mapping,通过bulk的方式往里面插入几条数据,用于demo的操作。
需要注意的是聚合分析的字段需要设置 fielddata,不过keyword和date类型不需要单独设置。
PUT /phones
{
"mappings": {
"properties": {
"price":{
"type":"long"
},
"color":{
"type": "keyword"
},
"brand":{
"type": "keyword"
},
"release_date":{
"type": "date"
}
}
}
}
PUT /phones/_bulk
{"index":{}}
{"price":100,"color":"白色","brand":"小米","release_date":"2022-02-06"}
{"index":{}}
{"price":150,"color":"白色","brand":"小米","release_date":"2022-02-06"}
{"index":{}}
{"price":200,"color":"黑色","brand":"小米","release_date":"2022-02-08"}
{"index":{}}
{"price":250,"color":"黑色","brand":"小米","release_date":"2022-02-08"}
{"index":{}}
{"price":300,"color":"白色","brand":"华为","release_date":"2022-02-08"}
{"index":{}}
{"price":400,"color":"黑色","brand":"华为","release_date":"2022-02-10"}
{"index":{}}
{"price":500,"color":"灰色","brand":"华为","release_date":"2022-02-11"}
{"index":{}}
{"price":250,"color":"白色","brand":"苹果","release_date":"2022-02-11"}
统计各个品牌的phone数量
根据brand进行分组,默认聚合统计就会返回出对应bucket中的doc数量。
设置size为0表示不返回原数据。
GET /phones/_search
{
"size":0,
"aggs":{
"group_brand":{
"terms":{
"field":"brand"
}
}
}
}
返回结果,doc_count就表示bucket中的doc数量。
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"group_brand" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "小米",
"doc_count" : 4
},
{
"key" : "华为",
"doc_count" : 3
},
{
"key" : "苹果",
"doc_count" : 1
}
]
}
}
}
统计各个品牌的平均价格
先根据品牌进行分组,然后对price执行avg操作。
GET /phones/_search
{
"size":0,
"aggs": {
"group_brand": {
"terms": {
"field": "brand",
"size": 10
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
返回结果,bucket内多了一个avg_price字段,就是平均价格。
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"group_brand" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "小米",
"doc_count" : 4,
"avg_price" : {
"value" : 175.0
}
},
{
"key" : "华为",
"doc_count" : 3,
"avg_price" : {
"value" : 400.0
}
},
{
"key" : "苹果",
"doc_count" : 1,
"avg_price" : {
"value" : 250.0
}
}
]
}
}
}
统计各个品牌每种颜色的平均价格
先根据品牌分组再根据颜色分组,然后再求平均价格
GET /phones/_search
{
"size": 0,
"aggs": {
"group_brand": {
"terms": {
"field": "brand"
},
"aggs": {
"group_brand_color":{
"terms": {
"field": "color"
},
"aggs": {
"brand_color_avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
返回结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"group_brand" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "小米",
"doc_count" : 4,
"group_brand_color" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "白色",
"doc_count" : 2,
"brand_color_avg_price" : {
"value" : 125.0
}
},
{
"key" : "黑色",
"doc_count" : 2,
"brand_color_avg_price" : {
"value" : 225.0
}
}
]
}
},
{
"key" : "华为",
"doc_count" : 3,
"group_brand_color" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "灰色",
"doc_count" : 1,
"brand_color_avg_price" : {
"value" : 500.0
}
},
{
"key" : "白色",
"doc_count" : 1,
"brand_color_avg_price" : {
"value" : 300.0
}
},
{
"key" : "黑色",
"doc_count" : 1,
"brand_color_avg_price" : {
"value" : 400.0
}
}
]
}
},
{
"key" : "苹果",
"doc_count" : 1,
"group_brand_color" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "白色",
"doc_count" : 1,
"brand_color_avg_price" : {
"value" : 250.0
}
}
]
}
}
]
}
}
}
统计品牌下价格的最大、最小、平均、总和
GET /phones/_search
{
"size": 0,
"aggs": {
"group_by_brand": {
"terms": {
"field": "brand"
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
},
"max_price":{
"max": {
"field": "price"
}
},
"min_price":{
"min": {
"field": "price"
}
},
"sum_price":{
"sum": {
"field": "price"
}
}
}
}
}
}
返回结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"group_by_brand" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "小米",
"doc_count" : 4,
"max_price" : {
"value" : 250.0
},
"min_price" : {
"value" : 100.0
},
"avg_price" : {
"value" : 175.0
},
"sum_price" : {
"value" : 700.0
}
},
{
"key" : "华为",
"doc_count" : 3,
"max_price" : {
"value" : 500.0
},
"min_price" : {
"value" : 300.0
},
"avg_price" : {
"value" : 400.0
},
"sum_price" : {
"value" : 1200.0
}
},
{
"key" : "苹果",
"doc_count" : 1,
"max_price" : {
"value" : 250.0
},
"min_price" : {
"value" : 250.0
},
"avg_price" : {
"value" : 250.0
},
"sum_price" : {
"value" : 250.0
}
}
]
}
}
}
根据价格范围划分bucket
按照价格范围,以100为粒度进行分组。
GET /phones/_search
{
"size": 0,
"aggs": {
"range_price": {
"histogram": {
"field": "price",
"interval": 100
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
返回结果
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"range_price" : {
"buckets" : [
{
"key" : 100.0,
"doc_count" : 2,
"avg_price" : {
"value" : 125.0
}
},
{
"key" : 200.0,
"doc_count" : 3,
"avg_price" : {
"value" : 233.33333333333334
}
},
{
"key" : 300.0,
"doc_count" : 1,
"avg_price" : {
"value" : 300.0
}
},
{
"key" : 400.0,
"doc_count" : 1,
"avg_price" : {
"value" : 400.0
}
},
{
"key" : 500.0,
"doc_count" : 1,
"avg_price" : {
"value" : 500.0
}
}
]
}
}
}
按天统计价格总和
date_histogram可以对时间范围进行分组,这里是按天分组。
GET /phones/_search
{
"size": 0,
"aggs": {
"range_date": {
"date_histogram": {
"field": "release_date",
"calendar_interval": "day",
"format": "yyyy-MM-dd",
"min_doc_count": 1,
"extended_bounds": {
"min": "2022-02-05",
"max": "2022-02-15"
}
},
"aggs": {
"sum_price": {
"sum": {
"field": "price"
}
}
}
}
}
}
返回结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"range_date" : {
"buckets" : [
{
"key_as_string" : "2022-02-06",
"key" : 1644105600000,
"doc_count" : 2,
"sum_price" : {
"value" : 250.0
}
},
{
"key_as_string" : "2022-02-08",
"key" : 1644278400000,
"doc_count" : 3,
"sum_price" : {
"value" : 750.0
}
},
{
"key_as_string" : "2022-02-10",
"key" : 1644451200000,
"doc_count" : 1,
"sum_price" : {
"value" : 400.0
}
},
{
"key_as_string" : "2022-02-11",
"key" : 1644537600000,
"doc_count" : 2,
"sum_price" : {
"value" : 750.0
}
}
]
}
}
}