sls大盘监控

大盘监控sql

可用率
* |SELECT sum(CASE when HttpCode >=200 and HttpCode < 500 then 1 else 0 end) * 1.0 / COUNT(*) * 100 as available_rate

成功率
* |SELECT sum(CASE when HttpCode =200 then 1 else 0 end) * 1.0 / COUNT(*) * 100 as available_rate

总调用量
* |SELECT COUNT(*) as total

峰值QPS
* | select max(qps) as max from (select __time__ as ts, count(*) as qps from log group by ts limit 100000)

活跃用户数
* |select COUNT(*) as count from (SELECT CallerUid from log GROUP BY CallerUid)

活跃API数
* |select COUNT(*) as count from (SELECT Api from log GROUP BY Api)

API分布
(*)| select case when idx < 8 then replace(Api, 'Recognize', '') else 'Other' end as api, sum(count) as count from (select Api, COUNT(*) AS count, row_number() over (order by COUNT(*) desc) as idx from log GROUP BY Api) group by api order by count desc 

状态码分布
(*)| SELECT case when HttpCode>=400 and HttpCode<500 then '4xx' when HttpCode>=500 and HttpCode<600 then '5xx' when HttpCode=200 then '200' else '未知' end as code, COUNT(*) as count GROUP by code 


QPS时序
(*)| select ts/60*60 as minute, max(qps) as max_qps, sum(qps)/60 as avg_qps, sum(Basic)/60 as avg_Basic, sum(Idcard)/60 as avg_Idcard, sum(General)/60 as avg_General from (select __time__ as ts, count(*) as qps, sum(case when Api='SearchImageByPic' then 1 else 0 END) as Basic, sum(case when Api='RecognizeIdcard' then 1 else 0 END) as Idcard, sum(case when Api='RecognizeGeneral' then 1 else 0 END) as General from log group by ts limit 100000) group by minute limit 100000 
改
上海区域
(((*) and RegionId: cn-shanghai))| select ts/60*60 as minute, max(qps) as max_qps, sum(qps)/60 as avg_qps, sum(Basic)/60 as avg_SearchImage, sum(Idcard)/60 as avg_AddImage, sum(General)/60 as avg_DeleteImage,sum(SearchByUrl)/60 as avg_AlimamaSearch,sum(GetProductInfoByIds)/60 as avg_AlimamaById from (select __time__ as ts, count(*) as qps, sum(case when Api='SearchImageByPic' then 1 when Api='SearchImageByName' then 1 when Api='SearchImage' then 1 else 0 END) as Basic, sum(case when Api='AddImage' then 1 else 0 END) as Idcard, sum(case when Api='DeleteImage' then 1 else 0 END) as General, sum(case when Api='SearchByUrl' then 1 else 0 END) as SearchByUrl, sum(case when Api='GetProductInfoByIds' then 1 else 0 END) as GetProductInfoByIds from log group by ts limit 100000) group by minute limit 100000
其他区域
(*))| select ts/60*60 as minute, max(qps) as max_qps, sum(qps)/60 as avg_qps, sum(Basic)/60 as avg_SearchImage, sum(Idcard)/60 as avg_AddImage, sum(General)/60 as avg_DeleteImage from (select __time__ as ts, count(*) as qps, sum(case when Api='SearchImageByPic' then 1  when Api='SearchImageByName' then 1 when Api='SearchImage' then 1 else 0 END) as Basic, sum(case when Api='AddImage' then 1 else 0 END) as Idcard, sum(case when Api='DeleteImage' then 1 else 0 END) as General from log group by ts limit 100000) group by minute limit 100000

RT时序
(*)| select __time__ / 60 * 60 as time, floor(avg(IspUsedTime)) avg, approx_percentile(IspUsedTime, 0.95) as P95, approx_percentile(IspUsedTime, 0.99) as P99 group by time ORDER BY time LIMIT 100000 

状态码时序
(*)| select __time__ / 60 * 60 as time, sum(CASE when HttpCode =200 then 1 else 0 end) c200, sum(CASE when HttpCode >=400 and HttpCode < 500 then 1 else 0 end) as c4xx, sum(CASE when HttpCode >=500 then 1 else 0 end) as c5xx group by time ORDER BY time LIMIT 100000 

RT分布
(*)| SELECT CASE WHEN IspUsedTime>=0 and IspUsedTime<500 THEN '0~0.5s' WHEN IspUsedTime>=500 AND IspUsedTime < 1000 THEN '0.5s~1s' WHEN IspUsedTime>=1000 AND IspUsedTime < 2000 THEN '1s~2s' WHEN IspUsedTime>=2000 AND IspUsedTime < 5000 THEN '2s~5s' ELSE '5s+' END as range, count(*) as count GROUP by range order by count desc 

用户区域分布
(*)| select case when idx<6 then region else '其他' end as location, sum(count) as total from (select concat(ip_to_country(ClientIp),ip_to_province(ClientIp)) as region, COUNT(*) as count, row_number() over (order by COUNT(*) desc) as idx from log GROUP by region) GROUP by location order by total desc 

调用量Top30用户
(*)| SELECT CallerUid,COUNT(*) as total, sum(case when HttpCode=200 then 1 else 0 end) as c200, sum(case when HttpCode>=400 and HttpCode<500 then 1 else 0 end) as c4xx, sum(case when HttpCode>=500 then 1 else 0 end) as c5xx GROUP by CallerUid order by total desc limit 30 

API可用率&RT水位
((*))| select Api, COUNT(*) as total, round(sum(case when HttpCode <500 then 1 else 0 end) * 100.0/COUNT(*), 2) as available_rate, round(sum(case when HttpCode=200 then 1 else 0 end) * 100.0/COUNT(*),2) as success_rate,floor(avg(IspUsedTime)) avg, approx_percentile(IspUsedTime, 0.95) as P95, approx_percentile(IspUsedTime, 0.99) as P99 GROUP by Api order by total desc 

5xx分析
* and HttpCode>=500| select Api,HttpCode, COUNT(*) as count GROUP by Api,HttpCode order by count desc

慢请求分析
* and IspUsedTime>=1500 and HttpCode=200| select Api, COUNT(*)as count, sum(if(IspUsedTime<3000,1,0)) as rt1500_3000,sum(if(IspUsedTime>=3000 and IspUsedTime<5000,1,0)) as rt3000_5000,sum(if(IspUsedTime>5000,1,0)) as rt5000 GROUP by Api order by count desc
改
(*)| and HttpCode=200| select Api, COUNT(*) as count, sum(if( IspUsedTime>=1500 and IspUsedTime<3000,1,0)) as rt1500_3000,sum(if(IspUsedTime>=3000 and IspUsedTime<5000,1,0)) as rt3000_5000,sum(if(IspUsedTime>5000,1,0)) as rt5000 GROUP by Api order by count desc

请求大小(5min均值)时序
(* and HttpCode=200)| select __time__/300*300 as ts, sum(length(RequestContent))/COUNT(*) as avg_size GROUP by ts 

限流时序
((*))| select __time__/60*60 as time, sum(CASE when ErrorCode = 'DeniedRequest' then 1 else 0 end) as flowLimit ,Api   group by Api,time  HAVING flowLimit >0 ORDER BY time LIMIT 100000 

用户维度4xx分析
((* and HttpCode>200 and HttpCode<500))| select CallerUid, Api, ErrorCode, COUNT(*) as count GROUP BY CallerUid, Api, ErrorCode ORDER BY count desc limit 10

所有请求都失败的用户
(* and RegionId: cn-shenzhen)| SELECT CallerUid,COUNT(*) as total, sum(case when HttpCode=200 then 1 else 0 end) as c200, sum(case when HttpCode>=400 and HttpCode<500 then 1 else 0 end) as c4xx, sum(case when HttpCode>=500 then 1 else 0 end) as c5xx  WHERE CallerUid!='' GROUP by CallerUid HAVING c200=0 order by total desc 

告警sql

慢请求告警(availability_rate>3.0)
* and RegionId: cn-hangzhou | select time,count,rt3000,round(rt3000 / (0.0 + count)* 100, 2) as availability_rate from ( select time_series(__time__,'30s','%H:%i:%s','0') as time,count(*) as count,sum(if(IspUsedTime>3000,1,0)) as rt3000 from log group by time) order by time LIMIT 100000
告警模版:
30秒总请求数为${count},5xx请求数为${http5xx},达到了百分之${availability_rate}

5xx告警(availability_rate>1.0)
* and RegionId: cn-shanghai | select time,count, http5xx,round(http5xx / (0.0 + count)* 100, 2) as availability_rate from ( select time_series(__time__,'30s','%H:%i:%s','0') as time,count(*) as count,sum(CASE when HttpCode >=500 then 1 else 0 end) as http5xx from log group by time) order by time LIMIT 100000
告警模版:
30秒总请求数为${count},慢请求请求数为${rt3000},达到了百分之${availability_rate}

某个用户所有请求失败(c200=0)
((*) and RegionId: cn-shanghai)| SELECT CallerUid,COUNT(*) as total, sum(case when HttpCode=200 then 1 else 0 end) as c200 GROUP by CallerUid order by total desc

30秒内如果5xx数量超过30个
* |  select time_series(__time__,'30s','%H:%i:%s','0') as time,count(*) as count,sum(CASE when HttpCode >=500 then 1 else 0 end) as http5xx from log group by time order by time LIMIT 100000

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值