ElasticSearch--聚合分析(二)

ES会自动创建index和type

PUT /index/type/1
PUT /ecommerce/product/1
{
  "name":"gaolujie yagao",
  "desc":"gaoxiao meibai",
  "price":30,
  "producer":" gaolujie producer",
  "tags":["meibai","fanzhu"]
}

PUT /ecommerce/product/2
{
  "name":"jiajieshi yagao",
  "desc":"youxiao fangzhu",
  "price":30,
  "producer":"jiajieshi producer",
  "tags":["fanzhu"]
}
PUT /ecommerce/product/3
{
  "name":"zhanhua",
  "desc":"caoben zhiwu",
  "price":40,
  "producer":"zhonghua producer",
  "tags":["qingxing"]
}

3.6 查询数据

GET /ecommerce/product/3

每个标签下有多少个商品

GET ecommerce/product/_search
{
  "aggs":{
    "group_by_tags":{
      "terms":{
        "field": "tags"
      }
    }
  }
}

group_by_tags为自定义,得到结果

"aggregations": {
    "group_by_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "fanzhu",
          "doc_count": 2
        },
        {
          "key": "meibai",
          "doc_count": 2
        },
        {
          "key": "qingxing",
          "doc_count": 1
        }
      ]
    }
  }

需要将文本field的fielddata属性设置为true。否则会报错

PUT /ecommerce/_mapping/product
{
  "properties": {
    "tags":{
      "type": "text",
      "fielddata":true
    }
  }
}

5.1.2 先筛选后分组
对名称中包含yagao的商品,计算每个tag下的商品数量。
在上面的基础上加筛选。

GET ecommerce/product/_search
{
  "query": {
    "match": {
      "name": "yagao"
    }
  }, 
  "aggs":{
    "group_by_tags":{
      "terms":{
        "field": "tags"
      }
    }
  }
}

5.1.3 先分组再求每组的平均数

GET ecommerce/product/_search
{
  "size": 0, 
  "aggs":{
    "group_by_tags":{
      "terms":{
        "field": "tags"
      },
      "aggs":{
        "avg_price":{
          "avg":{"field":"price"}
        }
      }
    }
  }
}

需要说明的是需要添加”size”:0,不然会返回很多数据。得到的结果

"aggregations": {
    "group_by_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "fanzhu",
          "doc_count": 2,
          "avg_price": {
            "value": 30
          }
        },
        {
          "key": "meibai",
          "doc_count": 2,
          "avg_price": {
            "value": 40
          }
        },
        {
          "key": "qingxing",
          "doc_count": 1,
          "avg_price": {
            "value": 40
          }
        }
      ]
    }
  }

5.1.4 分组后求平均数然后根据每组平均数降序。

GET ecommerce/product/_search
{
  "size": 0, 
  "aggs":{
    "group_by_tags":{
      "terms":{
        "field": "tags",
        "order": {
          "avg_price": "desc"
        }
      },
      "aggs":{
        "avg_price":{
          "avg":{"field":"price"}
        }
      }
    }
  }
}

得到结果

"aggregations": {
    "group_by_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "meibai",
          "doc_count": 2,
          "avg_price": {
            "value": 40
          }
        },
        {
          "key": "qingxing",
          "doc_count": 1,
          "avg_price": {
            "value": 40
          }
        },
        {
          "key": "fanzhu",
          "doc_count": 2,
          "avg_price": {
            "value": 30
          }
        }
      ]
    }
  }

5.1.5 按照指定的价格区间分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格。

GET ecommerce/product/_search
{
  "size": 0, 
  "aggs":{
    "group_by_tags":{
      "range": {
        "field": "price",
        "ranges": [
          {
            "from": 0,
            "to": 20
          },
          {
            "from":20,
            "to": 40
          },
          {
            "from": 40,
            "to": 50
          }
        ]
      }, 
      "aggs":{
        "group_by_tags":{
          "terms": {
            "field": "tags"
          },
          "aggs":{
            "average_price":{
              "avg":{
                "field": "price"
              }
            }
          }
        }
      }
    }
  }
}

得到结果:

"aggregations": {
    "group_by_tags": {
      "buckets": [
        {
          "key": "0.0-20.0",
          "from": 0,
          "to": 20,
          "doc_count": 0,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "20.0-40.0",
          "from": 20,
          "to": 40,
          "doc_count": 2,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "fanzhu",
                "doc_count": 2,
                "average_price": {
                  "value": 30
                }
              },
              {
                "key": "meibai",
                "doc_count": 1,
                "average_price": {
                  "value": 30
                }
              }
            ]
          }
        },
        {
          "key": "40.0-50.0",
          "from": 40,
          "to": 50,
          "doc_count": 1,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "qingxing",
                "doc_count": 1,
                "average_price": {
                  "value": 40
                }
              }
            ]
          }
        }
      ]
    }
  }

嵌套分组

GET tvs/sales/_search
{
    "size":0,
    "aggs":{
        "group_by_color":{
            "terms":{
                "field":"color"
            },
            "aggs":{
                "color_avg_price":{
                    "avg":{
                        "field":"price"
                    }
                },
                "group_by_brand":{
                    "terms":{
                        "field":"brand"
                    },
                    "aggs":{
                        "brand_avg_price":{
                            "avg":{
                                "field":"price"
                            }
                        }
                    }
                }
            }
        }
    }
}

先根据color分组,求每一组的平均值,然后根据brand分组,也求该分组维度的平均值。

分组求每组的平均数、总和、最大、最小值。

GET tvs/sales/_search
{
  "size": 0,
  "aggs": {
    "colors": {
      "terms": {
        "field": "color"
      },
      "aggs": {
        "avg_price": {"avg": {"field": "price"}},
        "min_price": {"min": {"field": "price"}},
        "max_price": {"max": {"field": "price"}},
        "sum_price": {"sum": {"field": "price"}}
      }
    }
  }
}

单个分组与整体的比较。

GET tvs/sales/_search
{
  "size":0,
  "query": {
    "term": {
      "brand": {
        "value": "长虹"
      }
    }
  },
  "aggs": {
    "single_brand_avg_price": {
      "avg": {
        "field": "price"
      }
    },
    "all":{
      "global": {},
      "aggs":{
        "all_brand_avg_price":{
          "avg":{
            "field": "price"
          }
        }
      }
    }
  }
}

global:将所有的数据纳入聚合分组,而不管之前的query。
返回结果:

"aggregations": {
    "all": {
      "doc_count": 8,
      "all_brand_avg_price": {
        "value": 2650
      }
    },
    "single_brand_avg_price": {
      "value": 1666.6666666666667
    }
  }

根据聚合后的结果进行排序,如下,根据price的平均值对每个分组进行排序。

GET tvs/sales/_search
{
  "size": 0,
  "aggs": {
    "group_by_color": {
      "terms": {
        "field": "color",
        "order":{
          "avg_price":"desc"
        }
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

过滤和聚合混合使用

GET /music/doc/_search
{
  "size":0,
  "query": {
    "bool": {
      "filter": {
        "range": {
          "releaseTime": {
            "gte": "2021-01-01 00:00:00",
            "lte": "2021-05-01 23:59:59"
          }
        }
      }
    }
  }, 
  "aggs": {
    "collection": {
      "date_histogram": {
        "field": "releaseTime",
        "interval": "month",
        "format": "yyyy-MM-dd",
        "min_doc_count": 1,
        "extended_bounds": {
          "min": "2021-01-01",
          "max": "2021-05-01"
        }
      },
      "aggs": {
        "singer_agg": {
          "terms": {
            "field": "singer.key_word",
            "size": 10
          },
          "aggs": {
            "sum_collection": {
              "sum": {
                "field": "collectionNum"
              }
            }
          }
        },
        "total_sum":{
          "sum": {
            "field": "collectionNum"
          }
        }
      }
    }
  }
}

搜索和聚合集合,搜索正常放回,聚合在搜索的基础上添加自己的过滤。

GET /music/doc/_search
{
  "size":5,
  "query": {
    "term": {
      "singer": {
        "value": "周杰伦"
      }
    }
  },
  "aggs": {
    "collection": {
      "filter": {
        "range": {
          "releaseTime": {
            "gte": "2000-01-01 00:00:00",
            "lte": "2021-05-01 23:59:59"
          }
        }
      }, 
      "aggs": {
        "avg_collection": {
          "avg": {
            "field": "collectionNum"
          }
        }
      }
    }
  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值