1. 行列转换
描述:表中记录了各年份各部门的平均绩效考核成绩。
表名:t1
表结构:
a -- 年份
b -- 部门
c -- 绩效得分
表内容:
a b c
2014 B 9
2015 A 8
2014 A 10
2015 B 7
问题一:多行转多列
问题描述:将上述表内容转为如下输出结果所示:
a col_A col_B
2014 10 9
2015 8 7
参考答案:
select
a,
max(case when b="A" then c end) col_A,
max(case when b="B" then c end) col_B
from t1
group by a;
问题二:如何将结果转成源表?(多列转多行)
问题描述:将问题一的结果转成源表,问题一结果表名为t1_2。
参考答案:
select
a,
b,
c
from (
select a,"A" as b,col_a as c from t1_2
union all
select a,"B" as b,col_b as c from t1_2
)tmp;
问题三:同一部门会有多个绩效,求多行转多列结果
问题描述:2014年公司组织架构调整,导致部门出现多个绩效,业务及人员不同,无法合并算绩效,源表内容如下:
参考答案:
select
a,
max(case when b="A" then c end) col_A,
max(case when b="B" then c end) col_B
from (
select
a,
b,
concat_ws(",",collect_set(cast(c as string))) as c
from t1
group by a,b
)tmp
group by a;
2. 排名中取他值
表名:t2
表字段及内容:
a b c
2014 A 3
2014 B 1
2014 C 2
2015 A 4
2015 D 3
问题一:按a分组取b字段最小时对应的c字段
输出结果如下所示:
a min_c
2014 3
2015 4
参考答案:
select
a,
c as min_c
from
(
select
a,
b,
c,
row_number() over(partition by a order by b) as rn
from t2
)a
where rn = 1;
问题二:按a分组取b字段排第二时对应的c字段
输出结果如下所示:
select
a,
c as second_c
from
(
select
a,
b,
c,
row_number() over(partition by a order by b) as rn
from t2
)a
where rn = 2;
select
a,
min(if(asc_rn = 1, c, null)) as min_c,
max(if(desc_rn = 1, c, null)) as max_c
from
(
select
a,
b,
c,
row_number() over(partition by a order by b) as asc_rn,
row_number() over(partition by a order by b desc) as desc_rn
from t2
)a
where asc_rn = 1 or desc_rn = 1
group by a;
问题四:按a分组取b字段第二小和第二大时对应的c字段
输出结果如下所示:
a min_c max_c
2014 1 1
2015 3 4
参考答案:
select
ret.a
,max(case when ret.rn_min = 2 then ret.c else null end) as min_c
,max(case when ret.rn_max = 2 then ret.c else null end) as max_c
from (
select
*
,row_number() over(partition by t2.a order by t2.b) as rn_min
,row_number() over(partition by t2.a order by t2.b desc) as rn_max
from t2
) as ret
where ret.rn_min = 2
or ret.rn_max = 2
group by ret.a;
问题五:按a分组取b字段前两小和前两大时对应的c字段
注意:需保持b字段最小、最大排首位
输出结果如下所示:
a min_c max_c
2014 3,1 2,1
2015 4,3 3,4
参考答案:
select
tmp1.a as a,
min_c,
max_c
from
(
select
a,
concat_ws(',', collect_list(c)) as min_c
from
(
select
a,
b,
c,
row_number() over(partition by a order by b) as asc_rn
from t2
)a
where asc_rn <= 2
group by a
)tmp1
join
(
select
a,
concat_ws(',', collect_list(c)) as max_c
from
(
select
a,
b,
c,
row_number() over(partition by a order by b desc) as desc_rn
from t2
)a
where desc_rn <= 2
group by a
)tmp2
on tmp1.a = tmp2.a;
3. 累计求职
表名:t3
表字段及内容:
a b c
2014 A 3
2014 B 1
2014 C 2
2015 A 4
2015 D 3
问题一:按a分组按b字段排序,对c累计求和
输出结果如下所示:
a b sum_c
2014 A 3
2014 B 4
2014 C 6
2015 A 4
2015 D 7
参考答案:
select
a,
b,
c,
sum(c) over(partition by a order by b) as sum_c
from t3;
问题二:按a分组按b字段排序,对c取累计平均值
输出结果如下所示:
a b avg_c
2014 A 3
2014 B 2
2014 C 2
2015 A 4
2015 D 3.5
参考答案:
select
a,
b,
c,
avg(c) over(partition by a order by b) as avg_c
from t3;
问题三:按a分组按b字段排序,对b取累计排名比例
输出结果如下所示:
a b ratio_c
2014 A 0.33
2014 B 0.67
2014 C 1.00
2015 A 0.50
2015 D 1.00
参考答案:
select
a,
b,
c,
round(row_number() over(partition by a order by b) / (count(c) over(partition by a)),2) as ratio_c
from t3
order by a,b;
问题四:按a分组按b字段排序,对b取累计求和比例
输出结果如下所示:
a b ratio_c
2014 A 0.50
2014 B 0.67
2014 C 1.00
2015 A 0.57
2015 D 1.00
参考答案:
select
a,
b,
c,
round(sum(c) over(partition by a order by b) / (sum(c) over(partition by a)),2) as ratio_c
from t3
order by a,b;
4. 窗口大小控制
表名:t4
表字段及内容:
a b c
2014 A 3
2014 B 1
2014 C 2
2015 A 4
2015 D 3
问题一:按a分组按b字段排序,对c取前后各一行的和
输出结果如下所示:
a b sum_c
2014 A 1
2014 B 5
2014 C 1
2015 A 3
2015 D 4
参考答案:
select
a,
b,
lag(c,1,0) over(partition by a order by b)+lead(c,1,0) over(partition by a order by b) as sum_c
from t4;
问题二:按a分组按b字段排序,对c取平均值
问题描述:前一行与当前行的均值!
输出结果如下所示:
a b avg_c
2014 A 3`在这里插入代码片`
2014 B 2
2014 C 1.5
2015 A 4
2015 D 3.5
参考答案:
select
a,
b,
case when lag_c is null then c
else (c+lag_c)/2 end as avg_c
from
(
select
a,
b,
c,
lag(c,1) over(partition by a order by b) as lag_c
from t4
)temp;
5. 产生连续数值
输出结果如下所示:
1
2
3
4
5
...
100
参考答案:
不借助其他任何外表,实现产生连续数值
此处给出两种解法,
其一:
select
id_start+pos as id
from(
select
1 as id_start,
1000000 as id_end
) m lateral view posexplode(split(space(id_end-id_start), '')) t as pos, val
其二:
select
row_number() over() as id
from
(select split(space(99), '') as x) t
lateral view
explode(x) ex;
那如何产生1至1000000连续数值?
参考答案:
select
row_number() over() as id
from
(select split(space(999999), '') as x) t
lateral view
explode(x) t1 as ex;
注意:‘’ 里面没有空格!
6. 数据扩充与收缩
表名:t6
表字段及内容:
a
3
2
4
问题一:数据扩充
输出结果如下所示:
a b
3 3、2、1
2 2、1
4 4、3、2、1
参考答案:
select
t.a,
concat_ws('、',collect_set(cast(t.rn as string))) as b
from
(
select
t6.a,
b.rn
from t6
left join
(
select
row_number() over() as rn
from
(select split(space(5), '') as x) t -- space(5)可根据t6表的最大值灵活调整
lateral view
explode(x) as t1 pe
) b
on 1 = 1
where t6.a >= b.rn
order by t6.a, b.rn desc
) t
group by t.a;
问题二:数据扩充,排除偶数
输出结果如下所示:
a b
3 3、1
2 1
4 3、1
参考答案:
select
t.a,
concat_ws('、',collect_set(cast(t.rn as string))) as b
from
(
select
t6.a,
b.rn
from t6
left join
(
select
row_number() over() as rn
from
(select split(space(5), '') as x) t
lateral view
explode(x) as t1 pe
) b
on 1 = 1
where t6.a >= b.rn and b.rn % 2 = 1
order by t6.a, b.rn desc
) t
group by t.a;
7. 合并与拆分
表名:t7
表字段及内容:
a b
2014 A
2014 B
2015 B
2015 D
问题一:合并
输出结果如下所示:
2014 A、B
2015 B、D
参考答案:
select
a,
concat_ws('、', collect_set(t.b)) b
from t7
group by a;
问题二:拆分
问题描述:将分组合并的结果拆分出来
参考答案:
select
t.a,
d
from
(
select
a,
concat_ws('、', collect_set(t7.b)) b
from t7
group by a
)t
lateral view
explode(split(t.b, '、')) table_tmp as d;
8. 模拟循环操作
表名:t8
表字段及内容:
a
1011
0101
问题一:如何将字符’1’的位置提取出来
输出结果如下所示:
1,3,4
2,4
参考答案:
select
a,
concat_ws(",",collect_list(cast(index as string))) as res
from (
select
a,
index+1 as index,
chr
from (
select
a,
concat_ws(",",substr(a,1,1),substr(a,2,1),substr(a,3,1),substr(a,-1)) str
from t8
) tmp1
lateral view posexplode(split(str,",")) t as index,chr
where chr = "1"
) tmp2
group by a;
9. 不使用distinct或group by去重
表名:t9
表字段及内容:
a b c d
2014 2016 2014 A
2014 2015 2015 B
问题一:不使用distinct或group by去重
输出结果如下所示:
2014 A
2016 A
2014 B
2015 B
参考答案:
select
t2.year
,t2.num
from
(
select
*
,row_number() over (partition by t1.year,t1.num) as rank_1
from
(
select
a as year,
d as num
from t9
union all
select
b as year,
d as num
from t9
union all
select
c as year,
d as num
from t9
)t1
)t2
where rank_1=1
order by num;
10. 容器–反转内容
表名:t10
表字段及内容:
a
AB,CA,BAD
BD,EA
问题一:反转逗号分隔的数据:改变顺序,内容不变
输出结果如下所示:
BAD,CA,AB
EA,BD
参考答案:
select
a,
concat_ws(",",collect_list(reverse(str)))
from
(
select
a,
str
from t10
lateral view explode(split(reverse(a),",")) t as str
) tmp1
group by a;
问题二:反转逗号分隔的数据:改变内容,顺序不变
输出结果如下所示:
BA,AC,DAB
DB,AE
参考答案:
select
a,
concat_ws(",",collect_list(reverse(str)))
from
(
select
a,
str
from t10
lateral view explode(split(a,",")) t as str
) tmp1
group by a;
11. 多容器–成对提取数据
表名:t11
表字段及内容:
a b
A/B 1/3
B/C/D 4/5/2
问题一:成对提取数据,字段一一对应
输出结果如下所示:
a b
A 1
B 3
B 4
C 5
D 2
参考答案:
select
a_inx,
b_inx
from
(
select
a,
b,
a_id,
a_inx,
b_id,
b_inx
from t11
lateral view posexplode(split(a,'/')) t as a_id,a_inx
lateral view posexplode(split(b,'/')) t as b_id,b_inx
) tmp
where a_id=b_id;
12. 多容器–转多行
表名:t12
表字段及内容:
a b c
001 A/B 1/3/5
002 B/C/D 4/5
问题一:转多行
输出结果如下所示:
a d e
001 type_b A
001 type_b B
001 type_c 1
001 type_c 3
001 type_c 5
002 type_b B
002 type_b C
002 type_b D
002 type_c 4
002 type_c 5
参考答案:
select
a,
d,
e
from
(
select
a,
"type_b" as d,
str as e
from t12
lateral view explode(split(b,"/")) t as str
union all
select
a,
"type_c" as d,
str as e
from t12
lateral view explode(split(c,"/")) t as str
) tmp
order by a,d;