Elasticsearch 聚合搜索的使用案例

Elasticsearch的聚合搜索

和mysql一样,Elasticsearch也支持对数据根据某一字段进行分组然后进行聚合分析。
Elasticsearch聚合搜索有两个比较常见并且重要的概念,bucket和metric。

  • Bucket,就是数据的分组,对某个字段进行分组的时候,这个字段值相同的那些数据就会被放到一个bucket中,SQL中的GROUP BY语法。
  • Metric,对一个数据分组执行的统计。当有了一堆bucket的时候,就可以对每个bucket中的数据进行聚合分析了。
数据准备

这里简单创建一个mapping,通过bulk的方式往里面插入几条数据,用于demo的操作。
需要注意的是聚合分析的字段需要设置 fielddata,不过keyword和date类型不需要单独设置。

PUT /phones
{
  "mappings": {
    "properties": {
      "price":{
        "type":"long"
      },
      "color":{
        "type": "keyword"
      },
      "brand":{
        "type": "keyword"
      },
      "release_date":{
        "type": "date"

      }
    }
  }
}
PUT /phones/_bulk
{"index":{}}
{"price":100,"color":"白色","brand":"小米","release_date":"2022-02-06"}
{"index":{}}
{"price":150,"color":"白色","brand":"小米","release_date":"2022-02-06"}
{"index":{}}
{"price":200,"color":"黑色","brand":"小米","release_date":"2022-02-08"}
{"index":{}}
{"price":250,"color":"黑色","brand":"小米","release_date":"2022-02-08"}
{"index":{}}
{"price":300,"color":"白色","brand":"华为","release_date":"2022-02-08"}
{"index":{}}
{"price":400,"color":"黑色","brand":"华为","release_date":"2022-02-10"}
{"index":{}}
{"price":500,"color":"灰色","brand":"华为","release_date":"2022-02-11"}
{"index":{}}
{"price":250,"color":"白色","brand":"苹果","release_date":"2022-02-11"}
统计各个品牌的phone数量

根据brand进行分组,默认聚合统计就会返回出对应bucket中的doc数量。
设置size为0表示不返回原数据。

GET /phones/_search
{
  "size":0,
  "aggs":{
    "group_brand":{
      "terms":{
        "field":"brand"
      }
    }
  }
}

返回结果,doc_count就表示bucket中的doc数量。

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_brand" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "小米",
          "doc_count" : 4
        },
        {
          "key" : "华为",
          "doc_count" : 3
        },
        {
          "key" : "苹果",
          "doc_count" : 1
        }
      ]
    }
  }
}
统计各个品牌的平均价格

先根据品牌进行分组,然后对price执行avg操作。

GET /phones/_search
{
  "size":0,
  "aggs": {
    "group_brand": {
      "terms": {
        "field": "brand",
        "size": 10
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果,bucket内多了一个avg_price字段,就是平均价格。

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_brand" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "小米",
          "doc_count" : 4,
          "avg_price" : {
            "value" : 175.0
          }
        },
        {
          "key" : "华为",
          "doc_count" : 3,
          "avg_price" : {
            "value" : 400.0
          }
        },
        {
          "key" : "苹果",
          "doc_count" : 1,
          "avg_price" : {
            "value" : 250.0
          }
        }
      ]
    }
  }
}
统计各个品牌每种颜色的平均价格

先根据品牌分组再根据颜色分组,然后再求平均价格

GET /phones/_search
{
  "size": 0,
  "aggs": {
    "group_brand": {
      "terms": {
        "field": "brand"
      },
      "aggs": {
        "group_brand_color":{
          "terms": {
            "field": "color"
          },
          "aggs": {
            "brand_color_avg_price": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      }
    }
  }
}

返回结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_brand" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "小米",
          "doc_count" : 4,
          "group_brand_color" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "白色",
                "doc_count" : 2,
                "brand_color_avg_price" : {
                  "value" : 125.0
                }
              },
              {
                "key" : "黑色",
                "doc_count" : 2,
                "brand_color_avg_price" : {
                  "value" : 225.0
                }
              }
            ]
          }
        },
        {
          "key" : "华为",
          "doc_count" : 3,
          "group_brand_color" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "灰色",
                "doc_count" : 1,
                "brand_color_avg_price" : {
                  "value" : 500.0
                }
              },
              {
                "key" : "白色",
                "doc_count" : 1,
                "brand_color_avg_price" : {
                  "value" : 300.0
                }
              },
              {
                "key" : "黑色",
                "doc_count" : 1,
                "brand_color_avg_price" : {
                  "value" : 400.0
                }
              }
            ]
          }
        },
        {
          "key" : "苹果",
          "doc_count" : 1,
          "group_brand_color" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "白色",
                "doc_count" : 1,
                "brand_color_avg_price" : {
                  "value" : 250.0
                }
              }
            ]
          }
        }
      ]
    }
  }
}
统计品牌下价格的最大、最小、平均、总和
GET /phones/_search
{
  "size": 0,
  "aggs": {
    "group_by_brand": {
      "terms": {
        "field": "brand"
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        },
        "max_price":{
          "max": {
            "field": "price"
          }
        },
        "min_price":{
          "min": {
            "field": "price"
          }
        },
        "sum_price":{
          "sum": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_brand" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "小米",
          "doc_count" : 4,
          "max_price" : {
            "value" : 250.0
          },
          "min_price" : {
            "value" : 100.0
          },
          "avg_price" : {
            "value" : 175.0
          },
          "sum_price" : {
            "value" : 700.0
          }
        },
        {
          "key" : "华为",
          "doc_count" : 3,
          "max_price" : {
            "value" : 500.0
          },
          "min_price" : {
            "value" : 300.0
          },
          "avg_price" : {
            "value" : 400.0
          },
          "sum_price" : {
            "value" : 1200.0
          }
        },
        {
          "key" : "苹果",
          "doc_count" : 1,
          "max_price" : {
            "value" : 250.0
          },
          "min_price" : {
            "value" : 250.0
          },
          "avg_price" : {
            "value" : 250.0
          },
          "sum_price" : {
            "value" : 250.0
          }
        }
      ]
    }
  }
}
根据价格范围划分bucket

按照价格范围,以100为粒度进行分组。

GET /phones/_search
{
  "size": 0,
  "aggs": {
    "range_price": {
      "histogram": {
        "field": "price",
        "interval": 100
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "range_price" : {
      "buckets" : [
        {
          "key" : 100.0,
          "doc_count" : 2,
          "avg_price" : {
            "value" : 125.0
          }
        },
        {
          "key" : 200.0,
          "doc_count" : 3,
          "avg_price" : {
            "value" : 233.33333333333334
          }
        },
        {
          "key" : 300.0,
          "doc_count" : 1,
          "avg_price" : {
            "value" : 300.0
          }
        },
        {
          "key" : 400.0,
          "doc_count" : 1,
          "avg_price" : {
            "value" : 400.0
          }
        },
        {
          "key" : 500.0,
          "doc_count" : 1,
          "avg_price" : {
            "value" : 500.0
          }
        }
      ]
    }
  }
}
按天统计价格总和

date_histogram可以对时间范围进行分组,这里是按天分组。

GET /phones/_search
{
  "size": 0,
  "aggs": {
    "range_date": {
      "date_histogram": {
        "field": "release_date",
        "calendar_interval": "day",
        "format": "yyyy-MM-dd",
        "min_doc_count": 1, 
        "extended_bounds": {
          "min": "2022-02-05",
          "max": "2022-02-15"
        }
      },
      "aggs": {
        "sum_price": {
          "sum": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "range_date" : {
      "buckets" : [
        {
          "key_as_string" : "2022-02-06",
          "key" : 1644105600000,
          "doc_count" : 2,
          "sum_price" : {
            "value" : 250.0
          }
        },
        {
          "key_as_string" : "2022-02-08",
          "key" : 1644278400000,
          "doc_count" : 3,
          "sum_price" : {
            "value" : 750.0
          }
        },
        {
          "key_as_string" : "2022-02-10",
          "key" : 1644451200000,
          "doc_count" : 1,
          "sum_price" : {
            "value" : 400.0
          }
        },
        {
          "key_as_string" : "2022-02-11",
          "key" : 1644537600000,
          "doc_count" : 2,
          "sum_price" : {
            "value" : 750.0
          }
        }
      ]
    }
  }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值