es统计mysql 报表_Elasticsearch 统计代码例子

本文介绍了如何使用Elasticsearch进行数据聚合,包括计算最近15分钟平均访问时间、UV、请求执行时间的百分位数、基数以及按URL分组的响应时间。通过示例查询展示了如何利用date_histogram进行时间窗口统计和按链接请求时间的大小进行排序。
摘要由CSDN通过智能技术生成

aggs

avg 平均数

最近15分钟的平均访问时间,upstream_time_ms是每次访问时间,单位毫秒

{

"query": {

"filtered": {

"filter": {

"range": {

"@timestamp": {

"gt": "now-15m",

"lt": "now"

}

}

}

}

},

"aggs": {

"execute_time": {

"avg": {

"field": "upstream_time_ms"

}

}

}

}

//当然你也可以直接将过滤器写在aggs里面

{

"size": 0,

"aggs": {

"filtered_aggs": {

"filter": {

"range": {

"@timestamp": {

"gt": "now-15m",

"lt": "now"

}

}

},

"aggs": {

"execute_time": {

"avg": {

"field": "upstream_time_ms"

}

}

}

}

}

}

cardinality 基数,比如计算uv

你可能注意到了size:0,如果你只需要统计数据,不要数据本身,就设置它,这不是我投机取巧,官方文档也是这么干的。

{

"size": 0,

"aggs": {

"filtered_aggs": {

"filter": {

"range": {

"@timestamp": {

"gt": "now-15m",

"lt": "now"

}

}

},

"aggs": {

"ipv": {

"cardinality": {

"field": "ip"

}

}

}

}

}

}

percentiles 基于百分比统计

最近15分钟,99.9的请求的执行时间不超过多少

{

"size": 0,

"query": {

"filtered": {

"filter": {

"range": {

"@timestamp": {

"gt": "now-15m",

"lt": "now"

}

}

}

}

},

"aggs": {

"execute_time": {

"percentiles": {

"field": "upstream_time_ms",

"percents": [

90,

95,

99.9

]

}

}

}

}

//返回值,0.1%的请求超过了159ms

{

"took": 620,

"timed_out": false,

"_shards": {

"total": 5,

"successful": 5,

"failed": 0

},

"hits": {

"total": 679400,

"max_score": 0,

"hits": []

},

"aggregations": {

"execute_time": {

"values": {

"90.0": 24.727003484320534,

"95.0": 72.6200981699678,

"99.9": 159.01065773524886 //99.9的数据落在159以内,是系统计算出来159

}

}

}

}

percentile_ranks 指定一个范围,有多少数据落在这里

{

"size": 0,

"query": {

"filtered": {

"filter": {

"range": {

"@timestamp": {

"gt": "now-15m",

"lt": "now"

}

}

}

}

},

"aggs": {

"execute_time": {

"percentile_ranks": {

"field": "upstream_time_ms",

"values": [

50,

160

]

}

}

}

}

//返回值

{

"took": 666,

"timed_out": false,

"_shards": {

"total": 5,

"successful": 5,

"failed": 0

},

"hits": {

"total": 681014,

"max_score": 0,

"hits": []

},

"aggregations": {

"execute_time": {

"values": {

"50.0": 94.14716385885366,

"160.0": 99.91130872493076 //99.9的数据落在了160以内,这次,160是我指定的,系统计算出99.9

}

}

}

}

统计最近15分钟,不同的链接请求时间大小

{

"size": 0,

"query": {

"filtered": {

"filter": {

"range": {

"@timestamp": {

"gt": "now-15m",

"lt": "now"

}

}

}

}

},

"aggs": {

"execute_time": {

"terms": {

"field": "uri"

},

"aggs": {

"avg_time": {

"avg": {

"field": "upstream_time_ms"

}

}

}

}

}

}

//返回,看起来url1 比 url2慢一点(avg_time),不过url1的请求量比较大 (doc_count)

{

"took": 1655,

"timed_out": false,

"_shards": {

"total": 5,

"successful": 5,

"failed": 0

},

"hits": {

"total": 710802,

"max_score": 0,

"hits": []

},

"aggregations": {

"execute_time": {

"doc_count_error_upper_bound": 10,

"sum_other_doc_count": 347175,

"buckets": [

{

"key": "/url1",

"doc_count": 362688,

"avg_time": {

"value": 6.601660380271749

}

},

{

"key": "/url2",

"doc_count": 939,

"avg_time": {

"value": 5.313099041533547

}

}

]

}

}

}

找出url响应最慢的前2名

{

"size": 0,

"query": {

"filtered": {

"filter": {

"range": {

"@timestamp": {

"gt": "now-15m",

"lt": "now"

}

}

}

}

},

"aggs": {

"execute_time": {

"terms": {

"size": 2,

"field": "uri",

"order": {

"avg_time": "desc"

}

},

"aggs": {

"avg_time": {

"avg": {

"field": "upstream_time_ms"

}

}

}

}

}

}

//返回值

{

"took": 1622,

"timed_out": false,

"_shards": {

"total": 5,

"successful": 5,

"failed": 0

},

"hits": {

"total": 748712,

"max_score": 0,

"hits": []

},

"aggregations": {

"execute_time": {

"doc_count_error_upper_bound": -1,

"sum_other_doc_count": 748710,

"buckets": [

{

"key": "url_shit",

"doc_count": 123,

"avg_time": {

"value": 8884

}

},

{

"key": "url_shit2",

"doc_count": 456,

"avg_time": {

"value": 8588

}

}

]

}

}

}

value_count 文档数量

相当于

select count(*) from table group by uri,为了达到这个目的,只需要把上文中,avg 换成value_count。不过avg的时候,结果中的doc_count其实达到了同样效果。

怎么取数据画个图?比如:最近2分钟,每20秒的时间窗口中,平均响应时间是多少

{

"size": 0,

"query": {

"filtered": {

"filter": {

"range": {

"@timestamp": {

"gt": "now-2m",

"lt": "now"

}

}

}

}

},

"aggs": {

"execute_time": {

"date_histogram": {

"field": "@timestamp",

"interval": "20s"

},

"aggs": {

"avg_time": {

"avg": {

"field": "upstream_time_ms"

}

}

}

}

}

}

pv 分时统计图(每小时一统计)

周期大小对性能影响不大

{

"size":0,

"fields":false,

"aggs": {

"execute_time": {

"date_histogram": {

"field": "@timestamp",

"interval": "1h"

}

}

}

}

Elasticsearch 中,可以使用聚合(Aggregation)实现对文档进行聚合统计,其中包括出现次数的统计。下面是一个示例: 假设我们有一个名为 "sales" 的索引,包含以下文档: ``` { "product": "A", "price": 10.0, "timestamp": "2021-08-01T10:00:00Z" } { "product": "B", "price": 15.0, "timestamp": "2021-08-01T10:05:00Z" } { "product": "A", "price": 12.0, "timestamp": "2021-08-01T10:10:00Z" } { "product": "C", "price": 20.0, "timestamp": "2021-08-01T10:15:00Z" } { "product": "A", "price": 8.0, "timestamp": "2021-08-01T10:20:00Z" } { "product": "B", "price": 18.0, "timestamp": "2021-08-01T10:25:00Z" } ``` 现在,我们想要统计每个产品出现的次数,可以使用以下聚合查询: ``` { "aggs": { "products": { "terms": { "field": "product" } } } } ``` 其中,"aggs" 是聚合查询的关键字,"products" 是我们给这个聚合起的名字,"terms" 表示我们要按照某个字段进行分组,"field" 指定了我们要按照哪个字段进行分组。 运行上述查询后,得到的结果如下: ``` { "aggregations": { "products": { "buckets": [ { "key": "A", "doc_count": 3 }, { "key": "B", "doc_count": 2 }, { "key": "C", "doc_count": 1 } ] } } } ``` 其中,"key" 表示产品名称,"doc_count" 表示该产品出现的次数。 如果想要对出现次数进行排序,可以使用以下聚合查询: ``` { "aggs": { "products": { "terms": { "field": "product", "order": { "_count": "desc" } } } } } ``` 其中,"order" 表示按照什么字段进行排序,"_count" 表示按照出现次数进行排序,"desc" 表示降序排列。 运行上述查询后,得到的结果如下: ``` { "aggregations": { "products": { "buckets": [ { "key": "A", "doc_count": 3 }, { "key": "B", "doc_count": 2 }, { "key": "C", "doc_count": 1 } ] } } } ``` 其中,产品 A 出现的次数最多,排在第一位。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值