什么是聚合(Aggregation)
- ElasticSearch 除了搜索之外,提供的针对 ES 数据进行统计分析的功能
- 实时性高
- Hadoop (T + 1)
- 通过聚合, 我们会得到一个数据的概览, 试分析和总结全套的数据,而不是寻找单个文档
- 尖沙咀 和 香港岛的客房数量
- 不同的价格区间, 可预订的经济型酒店和五星级酒店的数量
- 高性能,只需要一条语句,就可以从ElasticSearch得到分析结果
- 无需再客户端自己去实现分析逻辑
集合的分类
- Bucket Aggegation - 一些列满足特定条件的文档的集合
- Metric Aggregation - 一些数学运算,可以对文档字段进行统计分析
- Pipeline Aggregation - 对其他的聚合结果进行二次聚合
- Matrix Aggregation - 支持对多个字段的操作并提供一个结果矩阵
Bucket
一系列满足条件的文档,类似于 SQL中的 GROUP by brand
对商品进行 高档 中档 低档分桶,然后在单独对高档的 商品进行 好评中评差评分桶
Metric
- Metric 会给予数据集计算结果, 除了支持在字段上进行计算,同样也支持在脚本 (painless script) 产生的结果上进行计算
- 大多数 Metric 是数学计算,仅输出一个值
min / max / sun / avg / cardinality
- 部分 metric 支持输出多个数值
stats / percentiles / percentile_ranks
通过kibana 进行一个航班目的地的统计信息
// 航班信息 按照目的地进行分桶统计
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"flight_dest": {
"terms": {
"field": "DestCountry"
}
}
}
}
// 返回结果
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"flight_dest" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 3187,
"buckets" : [
{
"key" : "IT",
"doc_count" : 2371
},
{
"key" : "US",
"doc_count" : 1987
},
{
"key" : "CN",
"doc_count" : 1096
},
{
"key" : "CA",
"doc_count" : 944
},
{
"key" : "JP",
"doc_count" : 774
},
{
"key" : "RU",
"doc_count" : 739
},
{
"key" : "CH",
"doc_count" : 691
},
{
"key" : "GB",
"doc_count" : 449
},
{
"key" : "AU",
"doc_count" : 416
},
{
"key" : "PL",
"doc_count" : 405
}
]
}
}
}
// 加入 Metrics
// 求出平均票价
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"flight_dest": {
"terms": {
"field": "DestCountry"
},
"aggs": {
"avg_price": {
"avg": { // 球平均价格
"field": "AvgTicketPrice"
}
},
"max_price" : {
"max": { // 最大价格
"field": "AvgTicketPrice"
}
},
"min_price" : {
"min": { // 最小价格
"field": "AvgTicketPrice"
}
}
}
}
}
}
// 结果,数据太多了。只放一部分
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"flight_dest" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 3187,
"buckets" : [
{
"key" : "IT",
"doc_count" : 2371,
"max_price" : {
"value" : 1195.3363037109375
},
"min_price" : {
"value" : 100.57646942138672
},
"avg_price" : {
"value" : 586.9627099618385
}
},
{
"key" : "US",
"doc_count" : 1987,
"max_price" : {
"value" : 1199.72900390625
},
"min_price" : {
"value" : 100.14596557617188
},
"avg_price" : {
"value" : 595.7743908825026
}
}
]
}
}
}
// 价格统计信息, 和 天气信息
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"flight_dest": {
"terms": {
"field" : "DestCountry"
},
"aggs": {
"stats_price": {
"stats": { // 这个直接包含了 min max avg count sum
"field": "AvgTicketPrice"
}
},
"wather" : {
"terms": {
"field": "DestWeather",
"size": 5
}
}
}
}
}
}
// 返回结果
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"flight_dest" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 3187,
"buckets" : [
{
"key" : "IT",
"doc_count" : 2371,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 506,
"buckets" : [
{
"key" : "Clear",
"doc_count" : 428
},
{
"key" : "Sunny",
"doc_count" : 424
},
{
"key" : "Rain",
"doc_count" : 417
},
{
"key" : "Cloudy",
"doc_count" : 414
},
{
"key" : "Heavy Fog",
"doc_count" : 182
}
]
},
"stats_price" : {
"count" : 2371,
"min" : 100.57646942138672,
"max" : 1195.3363037109375,
"avg" : 586.9627099618385,
"sum" : 1391688.585319519
}
},
{
"key" : "US",
"doc_count" : 1987,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 438,
"buckets" : [
{
"key" : "Rain",
"doc_count" : 371
},
{
"key" : "Clear",
"doc_count" : 346
},
{
"key" : "Sunny",
"doc_count" : 345
},
{
"key" : "Cloudy",
"doc_count" : 330
},
{
"key" : "Heavy Fog",
"doc_count" : 157
}
]
},
"stats_price" : {
"count" : 1987,
"min" : 100.14596557617188,
"max" : 1199.72900390625,
"avg" : 595.7743908825026,
"sum" : 1183803.7146835327
}
},
{
"key" : "CN",
"doc_count" : 1096,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 229,
"buckets" : [
{
"key" : "Sunny",
"doc_count" : 209
},
{
"key" : "Rain",
"doc_count" : 207
},
{
"key" : "Clear",
"doc_count" : 192
},
{
"key" : "Cloudy",
"doc_count" : 173
},
{
"key" : "Thunder & Lightning",
"doc_count" : 86
}
]
},
"stats_price" : {
"count" : 1096,
"min" : 102.90382385253906,
"max" : 1198.4901123046875,
"avg" : 640.7101617033464,
"sum" : 702218.3372268677
}
},
{
"key" : "CA",
"doc_count" : 944,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 189,
"buckets" : [
{
"key" : "Clear",
"doc_count" : 198
},
{
"key" : "Rain",
"doc_count" : 173
},
{
"key" : "Cloudy",
"doc_count" : 156
},
{
"key" : "Sunny",
"doc_count" : 148
},
{
"key" : "Damaging Wind",
"doc_count" : 80
}
]
},
"stats_price" : {
"count" : 944,
"min" : 100.5572509765625,
"max" : 1198.8525390625,
"avg" : 648.7471090413757,
"sum" : 612417.2709350586
}
},
{
"key" : "JP",
"doc_count" : 774,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 165,
"buckets" : [
{
"key" : "Rain",
"doc_count" : 152
},
{
"key" : "Sunny",
"doc_count" : 138
},
{
"key" : "Clear",
"doc_count" : 130
},
{
"key" : "Cloudy",
"doc_count" : 123
},
{
"key" : "Damaging Wind",
"doc_count" : 66
}
]
},
"stats_price" : {
"count" : 774,
"min" : 103.97209930419922,
"max" : 1199.4913330078125,
"avg" : 650.9203447346847,
"sum" : 503812.346824646
}
},
{
"key" : "RU",
"doc_count" : 739,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 161,
"buckets" : [
{
"key" : "Cloudy",
"doc_count" : 149
},
{
"key" : "Rain",
"doc_count" : 128
},
{
"key" : "Clear",
"doc_count" : 122
},
{
"key" : "Sunny",
"doc_count" : 117
},
{
"key" : "Thunder & Lightning",
"doc_count" : 62
}
]
},
"stats_price" : {
"count" : 739,
"min" : 101.0040054321289,
"max" : 1196.7423095703125,
"avg" : 662.9949632162009,
"sum" : 489953.27781677246
}
},
{
"key" : "CH",
"doc_count" : 691,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 128,
"buckets" : [
{
"key" : "Cloudy",
"doc_count" : 135
},
{
"key" : "Sunny",
"doc_count" : 134
},
{
"key" : "Clear",
"doc_count" : 128
},
{
"key" : "Rain",
"doc_count" : 115
},
{
"key" : "Heavy Fog",
"doc_count" : 51
}
]
},
"stats_price" : {
"count" : 691,
"min" : 101.3473129272461,
"max" : 1196.496826171875,
"avg" : 575.1067587028537,
"sum" : 397398.7702636719
}
},
{
"key" : "GB",
"doc_count" : 449,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 93,
"buckets" : [
{
"key" : "Rain",
"doc_count" : 93
},
{
"key" : "Sunny",
"doc_count" : 81
},
{
"key" : "Clear",
"doc_count" : 77
},
{
"key" : "Cloudy",
"doc_count" : 71
},
{
"key" : "Heavy Fog",
"doc_count" : 34
}
]
},
"stats_price" : {
"count" : 449,
"min" : 111.34574890136719,
"max" : 1197.78564453125,
"avg" : 650.5326856005696,
"sum" : 292089.17583465576
}
},
{
"key" : "AU",
"doc_count" : 416,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 93,
"buckets" : [
{
"key" : "Rain",
"doc_count" : 80
},
{
"key" : "Cloudy",
"doc_count" : 75
},
{
"key" : "Clear",
"doc_count" : 73
},
{
"key" : "Sunny",
"doc_count" : 57
},
{
"key" : "Hail",
"doc_count" : 38
}
]
},
"stats_price" : {
"count" : 416,
"min" : 102.2943115234375,
"max" : 1197.6326904296875,
"avg" : 669.5588319668403,
"sum" : 278536.47409820557
}
},
{
"key" : "PL",
"doc_count" : 405,
"wather" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 90,
"buckets" : [
{
"key" : "Clear",
"doc_count" : 74
},
{
"key" : "Rain",
"doc_count" : 71
},
{
"key" : "Cloudy",
"doc_count" : 67
},
{
"key" : "Sunny",
"doc_count" : 66
},
{
"key" : "Thunder & Lightning",
"doc_count" : 37
}
]
},
"stats_price" : {
"count" : 405,
"min" : 104.28328704833984,
"max" : 1185.43701171875,
"avg" : 662.4497233072917,
"sum" : 268292.1379394531
}
}
]
}
}
}