字段说明:
station_id_d 站点ID
date_time 时间
对于连续含义不同理解的sql:
1):认为days为1是无效值的,判断为不连续 count(1)-1 (减1)
1、去除无效值
2、起止时间的差为连续天数,10/11/12 连续天数为2
select
station_id_d,count(1)-1 days
from
(select
station_id_d,ct,idx,
subtractDays(ct,idx) diff
from
(select
station_id_d ,groupArray(date_time) arr,
arrayEnumerate(arr) arr_index
from
(select station_id_d ,date_time from cmadb.t_aws_low_tem tazr where year='2017' and mon=1 and `day` >=1 and day <=10 )
group by station_id_d)
array join arr as ct,
arr_index as idx)
group by station_id_d,diff
order by station_id_d,days desc
limit 1 by station_id_d;
2):认为days为1的 是有效值的 判断为连续一天 -->
1、保留1的值
2、起止时间的差+1为连续天数,10/11/12 连续天数为3
select
station_id_d,count(1) days
from
(select
station_id_d,ct,idx,
subtractDays(ct,idx) diff
from
(select
station_id_d ,groupArray(date_time) arr,
arrayEnumerate(arr) arr_index
from
(select station_id_d ,date_time from cmadb.t_aws_low_tem tazr where year='2017' and mon=1 and `day` >=1 and day <=10 )
group by station_id_d)
array join arr as ct,
arr_index as idx)
group by station_id_d,diff
order by station_id_d,days desc
limit 1 by station_id_d;
3)加了条件HAVING days >1
1、查询出来的连续天数是1时,直接舍弃该结果,其他结果不做变化;
2、起止时间的差+1为连续天数,10/11/12 连续天数为3
select
station_id_d,
COUNT(1) days
from
(select
station_id_d,ct,idx,
subtractDays(ct,idx) diff
from
(select
station_id_d ,groupArray(date_time) arr,
arrayEnumerate(arr) arr_index
from
(select station_id_d ,date_time from cmadb.t_aws_low_tem where year='2017' and mon=1 and `day` >=1 and day <=10 )
group by station_id_d)
array join arr as ct,
arr_index as idx)
group by station_id_d,diff
HAVING days >1
order by station_id_d,days desc
limit 1 by station_id_d;
4)完善后的sql :addDays(diff,min(idx)) startTime,addDays(diff,max(idx)) endTime --> 在3)的基础上增加了连续天数的开始时间和结束时间
select
station_id_d,
COUNT(1) days,addDays(diff,min(idx)) startTime,addDays(diff,max(idx)) endTime
from
(select
station_id_d,arr,ct,idx,
subtractDays(ct,idx) diff
from
(select
station_id_d ,groupArray(date_time) arr,
arrayEnumerate(arr) arr_index
from
(select station_id_d ,date_time from cmadb.t_aws_low_tem tazr where year='2017' and mon=1 and `day` >=11 and day <=20 )
group by station_id_d)
array join arr as ct,
arr_index as idx order by station_id_d)
group by station_id_d,diff
HAVING days >1
order by station_id_d,days desc
limit 1 by station_id_d;