前言
关于聚合分析,我的简单理解就是根据一定条件,对数据的总体的概览,分布的分析和计算;类似关系型数据库通过where条件 select count(1);
下面使用例子学习如何用ES聚合分析
首先Elasticsearch对排序、聚合所依据的字段用单独的数据结构(fielddata)缓存到内存里了,但是在text字段上默认是禁用的,如果有需要单独开启,这样做的目的是为了节省内存空间。
https://www.elastic.co/guide/en/elasticsearch/reference/current/fielddata.html
所以这里先把相关字段的fielddata属性设置为true;
PUT /test_index/_mapping/base
{
"properties": {
"interests": {
"type": "text",
"fielddata": true
}
}
}
结果
{
"acknowledged": true
}
acknowledged为true表示设置成功,然后我们可以开始测试了。
测试一:统计每个兴趣的数量
group_by_interests是我自己起的名字,可以随便起名。也就是分组名 ,即ES的桶
GET /test_index/base/_search
{
"aggs": {
"group_by_interests":{
"terms":{
"field":"interests"
}
}
}
}
结果
{
"took": 27,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "base",
"_id": "2",
"_score": 1,
"_source": {
"name": "xiaomei",
"age": 18,
"sex": "F",
"interests": "music movie"
}
},
{
"_index": "test_index",
"_type": "base",
"_id": "1",
"_score": 1,
"_source": {
"name": "xiaoming",
"age": 18,
"sex": "M",
"interests": "sport movie"
}
},
{
"_index": "test_index",
"_type": "base",
"_id": "3",
"_score": 1,
"_source": {
"name": "xiaojiejie",
"age": 25,
"sex": "F",
"interests": "movie"
}
}
]
},
"aggregations": {
"group_by_interests": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "movie",
"doc_count": 3
},
{
"key": "music",
"doc_count": 1
},
{
"key": "sport",
"doc_count": 1
}
]
}
}
}
可以很快的看到统计结果
兴趣 | 数量 |
---|---|
movie | 3 |
music | 1 |
sport | 1 |
测试二:统计性别为女的兴趣爱好
即可以追加条件
GET /test_index/base/_search
{
"query": {
"match": {
"sex": "F"
}
},
"aggs": {
"group_by_interests":{
"terms":{
"field":"interests"
}
}
}
}
结果
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.2876821,
"hits": [
{
"_index": "test_index",
"_type": "base",
"_id": "2",
"_score": 0.2876821,
"_source": {
"name": "xiaomei",
"age": 18,
"sex": "F",
"interests": "music movie"
}
},
{
"_index": "test_index",
"_type": "base",
"_id": "3",
"_score": 0.2876821,
"_source": {
"name": "xiaojiejie",
"age": 25,
"sex": "F",
"interests": "movie"
}
}
]
},
"aggregations": {
"group_by_interests": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "movie",
"doc_count": 2
},
{
"key": "music",
"doc_count": 1
}
]
}
}
}
测试三:计算每个兴趣爱好的平均年龄
需要用到嵌套。
GET /test_index/base/_search
{
"aggs": {
"group_by_interests":{
"terms":{
"field":"interests"
},
"aggs": {
"avg_age": {
"avg": {
"field": "age"
}
}
}
}
}
}
结果
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "base",
"_id": "2",
"_score": 1,
"_source": {
"name": "xiaomei",
"age": 18,
"sex": "F",
"interests": "music movie"
}
},
{
"_index": "test_index",
"_type": "base",
"_id": "1",
"_score": 1,
"_source": {
"name": "xiaoming",
"age": 18,
"sex": "M",
"interests": "sport movie"
}
},
{
"_index": "test_index",
"_type": "base",
"_id": "3",
"_score": 1,
"_source": {
"name": "xiaojiejie",
"age": 25,
"sex": "F",
"interests": "movie"
}
}
]
},
"aggregations": {
"group_by_interests": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "movie",
"doc_count": 3,
"avg_age": {
"value": 20.333333333333332
}
},
{
"key": "music",
"doc_count": 1,
"avg_age": {
"value": 18
}
},
{
"key": "sport",
"doc_count": 1,
"avg_age": {
"value": 18
}
}
]
}
}
}
测试四:计算每个兴趣爱好的平均年龄 并按照年龄降序排序
在测试三的基础上进行排序;
注意排序的时候要与自定义的名称avg_age相同。
GET /test_index/base/_search
{
"aggs": {
"group_by_interests":{
"terms":{
"field":"interests",
"order": {
"avg_age": "desc"
}
},
"aggs": {
"avg_age": {
"avg": {
"field": "age"
}
}
}
}
}
}
结果
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "base",
"_id": "2",
"_score": 1,
"_source": {
"name": "xiaomei",
"age": 18,
"sex": "F",
"interests": "music movie"
}
},
{
"_index": "test_index",
"_type": "base",
"_id": "1",
"_score": 1,
"_source": {
"name": "xiaoming",
"age": 18,
"sex": "M",
"interests": "sport movie"
}
},
{
"_index": "test_index",
"_type": "base",
"_id": "3",
"_score": 1,
"_source": {
"name": "xiaojiejie",
"age": 25,
"sex": "F",
"interests": "movie"
}
}
]
},
"aggregations": {
"group_by_interests": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "movie",
"doc_count": 3,
"avg_age": {
"value": 20.333333333333332
}
},
{
"key": "music",
"doc_count": 1,
"avg_age": {
"value": 18
}
},
{
"key": "sport",
"doc_count": 1,
"avg_age": {
"value": 18
}
}
]
}
}
}