--(1)建表
create table cf_tmp.lst_work_wifi_test(
wifi_name string,
last_date string,
wifi_imei_cnt int
);
--(2)插入数据
insert into cf_tmp.lst_work_wifi_test values
('fintell1107','20190430',30),
('fintell1107','20190331',25),
('fintell1107','20190228',21),
('fintell1107','20190131',19),
('fintell1107','20181231',27),
('fintell1106','20190430',32),
('fintell1106','20190331',29),
('fintell1106','20190228',26),
('fintell1106','20190131',16),
('fintell1106','20181231',23);
--(3)查看数据
select * from cf_tmp.lst_work_wifi_test;
选出的数据一共10条,如下:
--(4)以下是我想验证的(这个错误之前犯过,当时xiaoxiao和guixiang帮我一起看过,感谢你们)
select
wifi_name
,case when rn = 1 then wifi_imei_cnt end wifi_imei_cnt_1m
,case when rn = 3 then wifi_imei_cnt end wifi_imei_cnt_3m
from
(select
wifi_name
,last_date
,wifi_imei_cnt
,row_number() over(partition by wifi_name order by last_date desc) as rn
from cf_tmp.lst_work_wifi_test
)a;
结果:
即,rn既不等于1也不等于3的那些行会自动变成null,这些不可以忽略。
注意了,这样修改以后就可以得到想要的东西了!!!(这边依然是想起xiaoxiao之前和我讲过,同时还看了aoyun的脚本,感谢!)
--(5)这样修改
select
wifi_name
,sum(case when rn = 1 then wifi_imei_cnt end )wifi_imei_cnt_1m
,sum(case when rn = 3 then wifi_imei_cnt end )wifi_imei_cnt_3m
from
(select
wifi_name
,last_date
,wifi_imei_cnt
,row_number() over(partition by wifi_name order by last_date desc) as rn
from cf_tmp.lst_work_wifi_test
)a
group by wifi_name;
结果:
Bingo!
Q:若一列有6个值,其中有1个是空值,那么avg函数的分母是6还是5呢?
insert into cf_tmp.lst_work_wifi_test values
('fintell1107','20181130',null),
('fintell1106','20181130',null);
select wifi_name,avg(wifi_imei_cnt) as wifi_imei_cnt_avg from cf_tmp.lst_work_wifi_test group by wifi_name;
结果:
说明分母是5。
insert into cf_tmp.lst_work_wifi_test values
('fintell1108','20190331',25),
('fintell1108','20190131',19),
('fintell1108','20181231',27),
('fintell1105','20190331',19),
('fintell1105','20190228',22),
('fintell1105','20190131',18),
('fintell1105','20181130',20);
select * from cf_tmp.lst_work_wifi_test order by wifi_name asc,last_date desc;
select
wifi_name
,sum(case when last_date = month_1 then wifi_imei_cnt end ) wifi_imei_cnt_1m
,sum(case when last_date = month_2 then wifi_imei_cnt end ) wifi_imei_cnt_2m
,sum(case when last_date = month_3 then wifi_imei_cnt end ) wifi_imei_cnt_3m
,sum(case when last_date = month_4 then wifi_imei_cnt end ) wifi_imei_cnt_4m
,sum(case when last_date = month_5 then wifi_imei_cnt end ) wifi_imei_cnt_5m
,sum(case when last_date = month_6 then wifi_imei_cnt end ) wifi_imei_cnt_6m
from
(select
wifi_name
,last_date
,wifi_imei_cnt
,row_number() over(partition by wifi_name order by last_date desc) as rn
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),0),'MM'),1),'-','') as month_1
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-1),'MM'),1),'-','') as month_2
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-2),'MM'),1),'-','') as month_3
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-3),'MM'),1),'-','') as month_4
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-4),'MM'),1),'-','') as month_5
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-5),'MM'),1),'-','') as month_6
from cf_tmp.lst_work_wifi_test
)a
group by wifi_name
;
以上代码第2部分的结果:
wifi_name | last_date | wifi_imei_cnt |
fintell1105 | 20190331 | 19 |
fintell1105 | 20190228 | 22 |
fintell1105 | 20190131 | 18 |
fintell1105 | 20181130 | 20 |
fintell1106 | 20190430 | 32 |
fintell1106 | 20190331 | 29 |
fintell1106 | 20190228 | 26 |
fintell1106 | 20190131 | 16 |
fintell1106 | 20181231 | 23 |
fintell1106 | 20181130 | NULL |
fintell1107 | 20190430 | 30 |
fintell1107 | 20190331 | 25 |
fintell1107 | 20190228 | 21 |
fintell1107 | 20190131 | 19 |
fintell1107 | 20181231 | 27 |
fintell1107 | 20181130 | NULL |
fintell1108 | 20190331 | 25 |
fintell1108 | 20190131 | 19 |
fintell1108 | 20181231 | 27 |
以上代码第3部分的结果:
wifi_name | wifi_imei_cnt_1m | wifi_imei_cnt_2m | wifi_imei_cnt_3m | wifi_imei_cnt_4m | wifi_imei_cnt_5m | wifi_imei_cnt_6m |
fintell1105 | NULL | 19 | 22 | 18 | NULL | 20 |
fintell1106 | 32 | 29 | 26 | 16 | 23 | NULL |
fintell1107 | 30 | 25 | 21 | 19 | 27 | NULL |
fintell1108 | NULL | 25 | NULL | 19 | 27 | NULL |
select
wifi_name
,avg(case when last_date >= month_3 then wifi_imei_cnt end ) wifi_imei_cnt_0ver3m
from
(select
wifi_name
,last_date
,wifi_imei_cnt
,row_number() over(partition by wifi_name order by last_date desc) as rn
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),0),'MM'),1),'-','') as month_1
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-1),'MM'),1),'-','') as month_2
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-2),'MM'),1),'-','') as month_3
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-3),'MM'),1),'-','') as month_4
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-4),'MM'),1),'-','') as month_5
,regexp_replace(date_sub(trunc(add_months(concat(substr('${date}',1,4),'-',substr('${date}',5,2),'-',substr('${date}',7,2)),-5),'MM'),1),'-','') as month_6
from cf_tmp.lst_work_wifi_test
)a
group by wifi_name
;
结果: