ES6.1.3 聚合查询

最新推荐文章于 2022-07-26 20:27:29 发布

姚贤贤

最新推荐文章于 2022-07-26 20:27:29 发布

阅读量631

点赞数

分类专栏：数据库文章标签： elasticsearch

本文链接：https://blog.csdn.net/u011311291/article/details/103336692

版权

数据库专栏收录该内容

31 篇文章 3 订阅

订阅专栏

数据:

PUT class_01
{
  "mappings": {
      "students": {
        "properties": {
          "age": {
            "type": "long"
          },
          "birthday": {
            "type": "date"
          },
          "sex": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "hobby": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "hobby_en": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "name": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "canteen_consumption_record": {
            "type": "nested",
            "properties": {
              "time": {
                "type": "date"
              },
              "value": {
                "type": "double"
              }
            }
          }
        }
      }
    }
}
PUT class_01/students/1
{
  "name":"张三",
  "sex":"man",
  "age":14,
  "birthday":"1992-11-11T15:00:00",
  "hobby":["篮球","足球"],
  "hobby_en":["basketball","football"],
  "stature":178,
  "canteen_consumption_record": [
  {
    "time": "2019-12-29T12:00:00",
    "value": 50
  },
  {
    "time": "2019-12-29T12:13:00",
    "value": 50
  }]
}
PUT class_01/students/2
{
  "name":"李四",
  "sex":"women",
  "age":15,
  "birthday":"1991-02-11T14:00:00",
  "hobby":["篮球","羽毛球"],
  "hobby_en":["basketball","badminton"],
  "canteen_consumption_record": [
  {
    "time": "2019-12-29T12:03:00",
    "value": 50
  }]
}
PUT class_01/students/3
{
  "name":"王五",
  "sex":"man",
  "age":16,
  "birthday":"1990-05-12T14:00:00",
  "hobby":["游泳","乒乓球"],
  "hobby_en":["swim","pingpong"],
  "canteen_consumption_record": [
  {
    "time": "2019-12-29T13:02:00",
    "value": 23.8
  }]
}
PUT class_01/students/4
{
  "name":"赵六",
  "sex":"man",
  "age":15,
  "birthday":"1991-05-11T14:00:00",
  "hobby":["游泳","篮球"],
  "hobby_en":["swim","basketball"],
  "canteen_consumption_record": [
  {
    "time": "2019-12-29T12:23:00",
    "value": 48.2
  }]
}

1.Adjacency Matrix Aggregation 邻接矩阵聚合

官方的邻接矩阵表示图:
在这里插入图片描述
假设有查询:

GET class_01/students/_search
{
  "size": 0,
  "aggs" : {
    "interactions" : {
      "adjacency_matrix" : {
        "filters" : {
          "grpA" : { "terms" : { "hobby_en" : ["swim", "basketball"] }},
          "grpB" : { "terms" : { "hobby_en" : ["pingpong", "football"] }}
        }
      }
    }
  }
}

{
  "aggregations": {
    "interactions": {
      "buckets": [
        {
          "key": "grpA",
          "doc_count": 4
        },
        {
          "key": "grpA&grpB",
          "doc_count": 2
        },
        {
          "key": "grpB",
          "doc_count": 2
        }
      ]
    }
  }
}

然后按照例子的中数据解释为:
1.文档的hobby_en，满足grpA中[“swim”, “basketball”]其中一个，就算匹配上，则student id,1,2,3,4的hobby_en都能有一个能匹配上，所以"doc_count": 4
2.grpA&grpB表示。文档的hobby_en要匹配上grpA并且也要匹配上grpB，所以"doc_count": 2
3.同1

2.Percentiles Aggregation

GET class_01/students/_search
{
    "size": 0,
    "aggs" : {
        "percent_age" : {
            "percentiles" : {
                "field" : "age" 
            }
        }
    }
}

结果:

{
  "aggregations": {
    "percent_age": {
      "values": {
        "1.0": 14.03, #(100-1)%的文档超出了14.03
        "5.0": 14.15, #(100-5)%的文档超出14.05
        "25.0": 14.75,
        "50.0": 15,
        "75.0": 15.25,
        "95.0": 15.85,
        "99.0": 15.969999999999999 #(100-99)%的文档超出15.96
      }
    }
  }
}

一般用来做相近统计

3.Composite Aggregation 复合查询

类似sql grouby多字段

GET class_01/_search
{
  "size": 0, 
    "aggs" : {
        "my_buckets": {
            "composite" : {
                 "sources" : [
                    { "date": { "date_histogram": { "field": "birthday", "interval": "1y", "order": "desc" } } },
                    { "age": { "terms": {"field": "age", "order": "desc" } } }
                ]
            }
            
        }
    }
}

结果:

{
  "aggregations": {
    "my_buckets": {
      "buckets": [
        {
          "key": {
            "date": 694224000000,
            "age": 14
          },
          "doc_count": 1
        },
        {
          "key": {
            "date": 662688000000,
            "age": 15
          },
          "doc_count": 2
        },
        {
          "key": {
            "date": 631152000000,
            "age": 16
          },
          "doc_count": 1
        }
      ]
    }
  }
}

4.Filter Aggregation 筛选聚合

group by只需要的字段

POST /class_01/students/_search
{
  "size":0,
    "aggs" : {
        "t_shirts" : {
            "filter" : { "term": { "sex": "man" } },
            "aggs" : {
                "avg_price" : { "avg" : { "field" : "age" } }
            }
        }
    }
}

结果:

"aggregations": {
    "filter_sex": {
      "doc_count": 3,
      "avg_age": {
        "value": 15
      }
    }
  }

5.global Aggregation全局聚合

可以查看全局和局部的对比，比如这里可以查看所有人的平均年龄和女人的平均年龄

GET /class_01/_search?size=0
{
    "query" : {
        "match" : { "sex" : "women" }
    },
    "aggs" : {
        "all_students" : {
            "global" : {}, 
            "aggs" : { 
                "avg_age" : { "avg" : { "field" : "age" } }
            }
        },
        "women_avg": { "avg" : { "field" : "age" } }
    }
}

结果：

"aggregations": {
    "women_avg": {
      "value": 15
    },
    "all_students": {
      "doc_count": 4,
      "avg_age": {
        "value": 15
      }
    }
  }

六.Missing Aggregation

统计丢失的字段，这里统计没有身高字段的文档个数

POST /class_01/students/_search?size=0
{
    "aggs" : {
        "students_without_birthday" : {
            "missing" : { "field" : "stature" }
        }
    }
}

七.Nested Aggregation

嵌套聚合查询,统计男性合女性时间段的食堂就餐次数，这里使用了"time_zone": “+00:15”，主要是为了让
时间"time": “2019-12-29T12:00:00”,和"time": "2019-12-29T12:13:00"统计出来的显示在"2019-12-29T12:15:00"这个时间点

GET class_01/_search
{
  "size": 0, 
  "aggs": {
    "group_sex": {
      "terms": {
        "field": "sex.keyword"
      },
      "aggs": {
        "nested": {
          "nested": {
            "path": "canteen_consumption_record"
          },
          "aggs": {
            "group_record_time" : {
                "date_histogram" : {
                    "field" : "canteen_consumption_record.time",
                    "interval" : "15m",
                    "time_zone": "+00:15"
                }
            }
          }
        }
      }
    }
  }
}

结果:

"aggregations": {
    "group_sex": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "man",
          "doc_count": 3,
          "nested": {
            "doc_count": 4,
            "group_record_time": {
              "buckets": [
                {
                  "key_as_string": "2019-12-29 12:15:00",
                  "key": 1577620800000,
                  "doc_count": 2
                },
                {
                  "key_as_string": "2019-12-29 12:30:00",
                  "key": 1577621700000,
                  "doc_count": 1
                },
                {
                  "key_as_string": "2019-12-29 12:45:00",
                  "key": 1577622600000,
                  "doc_count": 0
                },
                {
                  "key_as_string": "2019-12-29 13:00:00",
                  "key": 1577623500000,
                  "doc_count": 0
                },
                {
                  "key_as_string": "2019-12-29 13:15:00",
                  "key": 1577624400000,
                  "doc_count": 1
                }
              ]
            }
          }
        },
        {
          "key": "women",
          "doc_count": 1,
          "nested": {
            "doc_count": 1,
            "group_record_time": {
              "buckets": [
                {
                  "key_as_string": "2019-12-29 12:15:00",
                  "key": 1577620800000,
                  "doc_count": 1
                }
              ]
            }
          }
        }
      ]
    }
  }

八.Reverse nested Aggregation

在nested中指定了path，为了回到根root,需要使用reverse_nested，如果不使用reverse_nested，后续的group_sex将没有作用。

GET class_01/_search
{
  "size": 0, 
  "aggs": {
    "nested": {
      "nested": {
        "path": "canteen_consumption_record"
      },
      "aggs": {
        "group_record_time" : {
            "date_histogram" : {
                "field" : "canteen_consumption_record.time",
                "format": "yyyy-MM-dd HH:mm:ss",
                "interval" : "15m",
                "time_zone": "+00:15"
            },
            "aggs": {
            "comment_to_issue": {
              "reverse_nested": {}, 
              "aggs": {
                "group_sex": {
                  "terms": {
                    "field": "sex.keyword"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

结果:

"aggregations": {
    "nested": {
      "doc_count": 5,
      "group_record_time": {
        "buckets": [
          {
            "key_as_string": "2019-12-29 12:15:00",
            "key": 1577620800000,
            "doc_count": 3,
            "comment_to_issue": {
              "doc_count": 2,
              "group_sex": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": "man",
                    "doc_count": 1
                  },
                  {
                    "key": "women",
                    "doc_count": 1
                  }
                ]
              }
            }
          },
          {
            "key_as_string": "2019-12-29 12:30:00",
            "key": 1577621700000,
            "doc_count": 1,
            "comment_to_issue": {
              "doc_count": 1,
              "group_sex": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": "man",
                    "doc_count": 1
                  }
                ]
              }
            }
          },
          {
            "key_as_string": "2019-12-29 12:45:00",
            "key": 1577622600000,
            "doc_count": 0,
            "comment_to_issue": {
              "doc_count": 0,
              "group_sex": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": []
              }
            }
          },
          {
            "key_as_string": "2019-12-29 13:00:00",
            "key": 1577623500000,
            "doc_count": 0,
            "comment_to_issue": {
              "doc_count": 0,
              "group_sex": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": []
              }
            }
          },
          {
            "key_as_string": "2019-12-29 13:15:00",
            "key": 1577624400000,
            "doc_count": 1,
            "comment_to_issue": {
              "doc_count": 1,
              "group_sex": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": "man",
                    "doc_count": 1
                  }
                ]
              }
            }
          }
        ]
      }
    }
  }

八.Range Aggregation

可以指定多范围聚合，这里例子多范围内统计年龄个数和找到当前范围内出现的最大年龄

GET class_01/_search
{
  "size": 0, 
  "aggs" : {
      "age_ranges" : {
          "range" : {
              "field" : "age",
              "keyed" : true,
              "ranges" : [
                  { "to" : 14 },
                  { "from" : 14, "to" : 16 },
                  { "from" : 16 }
              ]
          },
          "aggs": {
            "haha": {
              "max": {
                "field": "age"
              }
            }
          }
      }
  }
}

结果:

"aggregations": {
    "age_ranges": {
      "buckets": {
        "*-14.0": {
          "to": 14,
          "doc_count": 0,
          "haha": {
            "value": null
          }
        },
        "14.0-16.0": {
          "from": 14,
          "to": 16,
          "doc_count": 3,
          "haha": {
            "value": 15
          }
        },
        "16.0-*": {
          "from": 16,
          "doc_count": 1,
          "haha": {
            "value": 16
          }
        }
      }
    }
  }

姚贤贤

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
ES6.1.3 聚合查询

数据:PUT class_01/students/1{ "name":"张三", "age":14, "birthday":"1992-11-11 15:00:00", "hobby":["篮球","足球"], "hobby_en":["basketball","football"]}PUT class_01/students/2{ "name":"李四", ...
复制链接

扫一扫

专栏目录