文章目录
参照视频:https://www.bilibili.com/video/BV1334y1v7Lf?p=27
数据准备
PUT /tvs
{
"mappings": {
"properties": {
"price": {
"type": "long"
},
"color": {
"type": "keyword"
},
"brand": {
"type": "keyword"
},
"sold_date": {
"type": "date"
}
}
}
}
POST /tvs/_bulk
{ "index": {}}
{ "price" : 1000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-10-28" }
{ "index": {}}
{ "price" : 2000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-11-05" }
{ "index": {}}
{ "price" : 3000, "color" : "绿色", "brand" : "小米", "sold_date" : "2016-05-18" }
{ "index": {}}
{ "price" : 1500, "color" : "蓝色", "brand" : "TCL", "sold_date" : "2016-07-02" }
{ "index": {}}
{ "price" : 1200, "color" : "绿色", "brand" : "TCL", "sold_date" : "2016-08-19" }
{ "index": {}}
{ "price" : 2000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-11-05" }
{ "index": {}}
{ "price" : 8000, "color" : "红色", "brand" : "三星", "sold_date" : "2017-01-01" }
{ "index": {}}
{ "price" : 2500, "color" : "蓝色", "brand" : "小米", "sold_date" : "2017-02-12" }
分组统计
查询每种颜色的销量
GET /tvs/_search
{
"size" : 0,
"aggs" : {
"popular_colors" : {
"terms" : {
"field" : "color"
}
}
}
}
返回
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"popular_colors" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "红色",
"doc_count" : 4
},
{
"key" : "绿色",
"doc_count" : 2
},
{
"key" : "蓝色",
"doc_count" : 2
}
]
}
}
}
如何查询哪一种颜色销量最高呢
分组嵌套
查询每种颜色的销量以及每种颜色的平均价格
GET /tvs/_search
{
"size": 0,
"aggs": {
"popular_color": {
"terms": {
"field": "color"
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
返回
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"popular_color" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "红色",
"doc_count" : 4,
"avg_price" : {
"value" : 3250.0
}
},
{
"key" : "绿色",
"doc_count" : 2,
"avg_price" : {
"value" : 2100.0
}
},
{
"key" : "蓝色",
"doc_count" : 2,
"avg_price" : {
"value" : 2000.0
}
}
]
}
}
}
从颜色到品牌进行下钻分析,每种颜色的平均价格,以及找到每种颜色每个品牌的平均价格
我们可以进行多层次的下钻
比如说,现在红色的电视有4台,同时这4台电视中,有3台是属于长虹的,1台是属于小米的
红色电视中的3台长虹的平均价格是多少?
红色电视中的1台小米的平均价格是多少?
GET /tvs/_search
{
"size": 0,
"aggs": {
"polular_color": {
"terms": {
"field": "color"
},
"aggs": {
"aggs_by_brand": {
"terms": {
"field": "brand"
},
"aggs": {
"brand_avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
返回
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"polular_color" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "红色",
"doc_count" : 4,
"aggs_by_brand" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "长虹",
"doc_count" : 3,
"brand_avg_price" : {
"value" : 1666.6666666666667
}
},
{
"key" : "三星",
"doc_count" : 1,
"brand_avg_price" : {
"value" : 8000.0
}
}
]
}
},
{
"key" : "绿色",
"doc_count" : 2,
"aggs_by_brand" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TCL",
"doc_count" : 1,
"brand_avg_price" : {
"value" : 1200.0
}
},
{
"key" : "小米",
"doc_count" : 1,
"brand_avg_price" : {
"value" : 3000.0
}
}
]
}
},
{
"key" : "蓝色",
"doc_count" : 2,
"aggs_by_brand" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TCL",
"doc_count" : 1,
"brand_avg_price" : {
"value" : 1500.0
}
},
{
"key" : "小米",
"doc_count" : 1,
"brand_avg_price" : {
"value" : 2500.0
}
}
]
}
}
]
}
}
}
histogram
histogram:类似于terms,也是进行bucket分组操作,接收一个field,按照这个field的值的各个范围区间,进行bucket分组操作
“histogram”:{
“field”: “price”,
“interval”: 2000
},
interval:2000,划分范围,02000,20004000,40006000,60008000,8000~10000,buckets
去根据price的值,比如2500,看落在哪个区间内,比如20004000,此时就会将这条数据放入20004000对应的那个bucket中
GET /tvs/_search
{
"size": 0,
"aggs": {
"price": {
"histogram": {
"field": "price",
"interval": 2000
}
}
}
}
返回
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"price" : {
"buckets" : [
{
"key" : 0.0,
"doc_count" : 3
},
{
"key" : 2000.0,
"doc_count" : 4
},
{
"key" : 4000.0,
"doc_count" : 0
},
{
"key" : 6000.0,
"doc_count" : 0
},
{
"key" : 8000.0,
"doc_count" : 1
}
]
}
}
}
date_histogram
GET /tvs/_search
{
"size" : 0,
"aggs": {
"sales": {
"date_histogram": {
"field": "sold_date",
"interval": "month",
"format": "yyyy-MM-dd",
"min_doc_count" : 0,
"extended_bounds" : {
"min" : "2016-01-01",
"max" : "2017-12-31"
}
}
}
}
}
{
"took" : 13,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"sales" : {
"buckets" : [
{
"key_as_string" : "2016-01-01",
"key" : 1451606400000,
"doc_count" : 0
},
{
"key_as_string" : "2016-02-01",
"key" : 1454284800000,
"doc_count" : 0
},
{
"key_as_string" : "2016-03-01",
"key" : 1456790400000,
"doc_count" : 0
},
{
"key_as_string" : "2016-04-01",
"key" : 1459468800000,
"doc_count" : 0
},
{
"key_as_string" : "2016-05-01",
"key" : 1462060800000,
"doc_count" : 1
},
{
"key_as_string" : "2016-06-01",
"key" : 1464739200000,
"doc_count" : 0
},
{
"key_as_string" : "2016-07-01",
"key" : 1467331200000,
"doc_count" : 1
},
{
"key_as_string" : "2016-08-01",
"key" : 1470009600000,
"doc_count" : 1
},
{
"key_as_string" : "2016-09-01",
"key" : 1472688000000,
"doc_count" : 0
},
{
"key_as_string" : "2016-10-01",
"key" : 1475280000000,
"doc_count" : 1
},
{
"key_as_string" : "2016-11-01",
"key" : 1477958400000,
"doc_count" : 2
},
{
"key_as_string" : "2016-12-01",
"key" : 1480550400000,
"doc_count" : 0
},
{
"key_as_string" : "2017-01-01",
"key" : 1483228800000,
"doc_count" : 1
},
{
"key_as_string" : "2017-02-01",
"key" : 1485907200000,
"doc_count" : 1
},
{
"key_as_string" : "2017-03-01",
"key" : 1488326400000,
"doc_count" : 0
},
{
"key_as_string" : "2017-04-01",
"key" : 1491004800000,
"doc_count" : 0
},
{
"key_as_string" : "2017-05-01",
"key" : 1493596800000,
"doc_count" : 0
},
{
"key_as_string" : "2017-06-01",
"key" : 1496275200000,
"doc_count" : 0
},
{
"key_as_string" : "2017-07-01",
"key" : 1498867200000,
"doc_count" : 0
},
{
"key_as_string" : "2017-08-01",
"key" : 1501545600000,
"doc_count" : 0
},
{
"key_as_string" : "2017-09-01",
"key" : 1504224000000,
"doc_count" : 0
},
{
"key_as_string" : "2017-10-01",
"key" : 1506816000000,
"doc_count" : 0
},
{
"key_as_string" : "2017-11-01",
"key" : 1509494400000,
"doc_count" : 0
},
{
"key_as_string" : "2017-12-01",
"key" : 1512086400000,
"doc_count" : 0
}
]
}
}
}
搜索后再聚合
GET /tvs/_search
{
"size": 0,
"query": {
"term": {
"brand": {
"value": "小米"
}
}
},
"aggs": {
"group_by_color": {
"terms": {
"field": "color"
}
}
}
}
同时实现基于搜索结果聚合,基于总体数据聚合
查询所有品牌的平均价格,查询长虹品牌的平均价格
GET /tvs/_search
{
"size": 0,
"query": {
"term": {
"brand": {
"value": "长虹"
}
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
},
"all": {
"global": {},
"aggs": {
"avg_all_brand": {
"avg": {
"field": "price"
}
}
}
}
}
}
返回
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"all" : {
"doc_count" : 8,
"avg_all_brand" : {
"value" : 2650.0
}
},
"avg_price" : {
"value" : 1666.6666666666667
}
}
}
global:就是global bucket,就是将所有数据纳入聚合的scope,而不管之前的query
过滤+聚合
GET /tvs/_search
{
"size": 0,
"query": {
"constant_score": {
"filter": {
"range": {
"price": {
"gte": 1200
}
}
}
}
},
"aggs": {
"avg_price": {
"avg": {"field": "price"}
}
}
}
在聚合桶中再filter
查询最近130天内的平均价格,最近140天内的平均价格,最近150天内的平均价格
如果在query中过滤,就会影响全局,如果在聚合aggs中过滤,则只会影响当前桶
首先我们先建立三个桶
GET /tvs/_search
{
"size": 0,
"aggs": {
"recent_130d": {
"filter": {
"range": {
"sold_date": {
"gte": "now-130d"
}
}
},
"aggs": {
"avg_price": {
"avg": {"field": "price"}
}
}
},
"recent_140d":{
"filter": {
"range": {
"sold_date": {
"gte": "now-140d"
}
}
},
"aggs": {
"avg_price": {
"avg": {"field": "price"}
}
}
},
"recent_150d":{
"filter": {
"range": {
"sold_date": {
"gte": "now-150d"
}
}
},
"aggs": {
"avg_price": {
"avg": {"field": "price"}
}
}
}
}
}
修改聚合默认排序
聚合默认是按照桶的个数降序排列的,现在要求按照每种颜色的平均价格排序
GET /tvs/_search
{
"size": 0,
"aggs": {
"color": {
"terms": {
"field": "color",
"order": {
"avg_price":"asc"
}
},
"aggs": {
"avg_price": {
"avg": {"field": "price"}
}
}
}
}
}
多次下钻后再聚合
GET /tvs/_search
{
"size": 0,
"aggs": {
"group_by_color": {
"terms": {
"field": "color"
},
"aggs": {
"group_by_brand": {
"terms": {
"field": "brand",
"order": {
"avg_price": "desc"
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
在聚合桶中显示文档列表
GET /tvs/_search
{
"size": 0,
"aggs": {
"color": {
"terms": {
"field": "color"
},
"aggs": {
"result": {
"top_hits": {
"size": 3,
"_source": {
"includes": [ "brand","price"]
}
}
}
}
}
}
}
显示
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 16,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"color" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "红色",
"doc_count" : 8,
"result" : {
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "yT0824ABVJ0Vnydxc_fY",
"_score" : 1.0,
"_source" : {
"price" : 1000,
"brand" : "长虹"
}
},
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "yj0824ABVJ0Vnydxc_fY",
"_score" : 1.0,
"_source" : {
"price" : 2000,
"brand" : "长虹"
}
},
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "zj0824ABVJ0Vnydxc_fY",
"_score" : 1.0,
"_source" : {
"price" : 2000,
"brand" : "长虹"
}
}
]
}
}
},
{
"key" : "绿色",
"doc_count" : 4,
"result" : {
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "yz0824ABVJ0Vnydxc_fY",
"_score" : 1.0,
"_source" : {
"price" : 3000,
"brand" : "小米"
}
},
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "zT0824ABVJ0Vnydxc_fY",
"_score" : 1.0,
"_source" : {
"price" : 1200,
"brand" : "TCL"
}
},
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "0z0924ABVJ0VnydxxPdV",
"_score" : 1.0,
"_source" : {
"price" : 3000,
"brand" : "小米"
}
}
]
}
}
},
{
"key" : "蓝色",
"doc_count" : 4,
"result" : {
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "zD0824ABVJ0Vnydxc_fY",
"_score" : 1.0,
"_source" : {
"price" : 1500,
"brand" : "TCL"
}
},
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "0D0824ABVJ0Vnydxc_fY",
"_score" : 1.0,
"_source" : {
"price" : 2500,
"brand" : "小米"
}
},
{
"_index" : "tvs",
"_type" : "_doc",
"_id" : "1D0924ABVJ0VnydxxPdV",
"_score" : 1.0,
"_source" : {
"price" : 1500,
"brand" : "TCL"
}
}
]
}
}
}
]
}
}
}
去重 cartinality
top_hits的简单使用获取最早进 最晚出的数据
原文档:https://blog.csdn.net/weixin_44131414/article/details/124122602
需求:
业务 需要知道工人打卡记录最早进 与 最早出 一天记录原始信息,现通过 top_hits来实现
GET /hw-attendance-2022-3month/_search
{
"query": {
"bool": {
"filter": [
{
"range": {
"record_time": {
"gte": "2022-03-07T00:00:00",
"lte": "2022-03-08T00:00:00"
}
}
},
{
"match":{
"idcardno.keyword":"53212*******1613"
}
}
]
}
}
,"size":0,
"aggs": {
"groupBy_idcardno": {
"terms": { --对人员身份证进行分桶
"field": "idcardno.keyword",
"size": 50
},
"aggs": {
"groupBy_recordtime":{
"date_histogram": { --对人员打卡日期,以天为单位进行分桶
"field": "record_time",
"calendar_interval": "day",
"format": "yyyy-MM-dd" ,
"min_doc_count" : 0
},
"aggs": {
"idcard_recordTime_top_last": {--通过top_hits排序获取最晚出的记录
"top_hits": {
"sort": [
{
"record_time": {
"order": "desc"
}
}
],
"size": 1
}
},
"idcard_recordTime_earliest": {--通过top_hits排序获取最早进的记录
"top_hits": {
"sort": [
{
"record_time": {
"order": "asc"
}
}
],
"size": 1
}
}
}
}
}
}
}
}
返回结果
{
"took" : 69,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 281,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"record_date_partiton" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "532122*******1613",
"doc_count" : 281,
"groupBy_recordtime" : {
"buckets" : [
{
"key_as_string" : "2022-03-07",
"key" : 1646611200000,
"doc_count" : 281,
"idcard_recordTime_earliest" : {
"hits" : {
"total" : {
"value" : 281,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "hw-attendance-2022-3month",
"_type" : "_doc",
"_id" : "PFywaH8BGUfFSK8ILMmm",
"_score" : null,
"_source" : {
"app_key" : "8134",
"sn" : "5437405391420012470I",
"equp_name" : "Other",
"pid" : 0,
"record_time" : "2022-03-07T06:04:17",
"full_name" : "赵***",
"photo_path" : "",
"idcardno" : "532*********1613",
"mobile" : "",
"tid" : "0",
"direct" : 1,
"insert_time" : "2022-03-08T16:40:50"
},
"sort" : [
1646633057000
]
}
]
}
},
"idcard_recordTime_top_last" : {
"hits" : {
"total" : {
"value" : 281,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "hw-attendance-2022-3month",
"_type" : "_doc",
"_id" : "qIOSZ38BPDXhbVpyFFgY",
"_score" : null,
"_source" : {
"app_key" : "8134",
"sn" : "5437405391420012470I",
"equp_name" : "Other",
"pid" : 0,
"record_time" : "2022-03-07T17:35:49",
"full_name" : "赵**",
"photo_path" : "",
"idcardno" : "5321**********613",
"mobile" : "",
"tid" : "0",
"direct" : 1,
"insert_time" : "2022-03-08T11:28:20"
},
"sort" : [
1646674549000
]
}
]
}
}
}
]
}
}
]
}
}
}
去重 聚合查询例子
pv: 所有人的pv相加
uv: 对userid进行去重
total_visit_time: 总访问时长
return_visit_uv: 回访uv
bounce_visit_uv: 跳出次数
curl -XGET ‘http://localhost:9200/logstash-2017.10.14/logs/_search?q=section:news&pretty’ -d ’
{
“size”: 0,
“aggs”: {
“pv”: {“sum”: {“field”: “pv”}},
“uv”: {“cardinality”: {“field”: “userid”, “precision_threshold”: 40000}},
“total_visit_time”: {“sum”: {“field”: “visit_time”}},
“return_visit_uv”: {
“filter”: {“term”: {“is_return_visit”: 1}},
“aggs”: {
“total_return_visit_uv”: {“cardinality”: {“field”: “userid”, “precision_threshold”: 40000}}
}
},
“bounce_visit_uv”: {
“filter”: {“term”: {“is_bounce_visit”: 1}},
“aggs”: {
“total_bounce_visit_uv”: {“cardinality”: {“field”: “userid”, “precision_threshold”: 40000}}
}
}
}
}’