(hive-day5作业)
https://blog.csdn.net/qq_38689352/article/details/113525853
本次做题总结:二次排序做法:rank() over(partition by c1 order by c2)
1.统计视频观看次数的前五(Top10)
select *
from t_video
order by views desc
limit 10 ;
2.统计视频类别“热度”(观看次数)的前十 Top10
select t2.categ,sum(t1.views) sv
from t_video t1
lateral view explode(category) t2 as categ -- 对观看次数炸裂
group by t2.categ
order by sv desc
limit 10;
3.统计视频"观看次数"前20所属类别,以及每个类别的视频个数
select t2.categ,count(t1.videoId)
from(
select category,videoId,views -- 观看次数前20所属类别
from t_video a
order by views desc
limit 20
) t1
LATERAL VIEW explode(t1.category) t2 as categ -- 炸裂类别
group by t2.categ
4.统计视频观看次数的前50(Top50)所关联视频的所属类别的个数Rank排名
select *,rank() over(order by ct desc ) -- 按个数对类别rank排序,一个分区内只对个数排序
from (
select t6.categ,count(*) ct -- 每个类别的数量
from
(
select t3.related,t4.category -- 所关联视频的所属类别
from
(
select related -- 所关联视频
from
(
select relatedId,views -- 观看次数前50
from t_video
order by
views desc
limit 50
) t1
lateral view explode(t1.relatedId) t2 as related
) t3
left join t_video t4
on t3.related = t4.videoId
) t5
lateral view explode(t5.category) t6 as categ
group by t6.categ
) t7 ;
5.统计上传视频最多的用户 Top10 以及他们上传的观看次数在前 20 视频
-- 理解方式一:
-- 统计上传视频最多的前十用户(Top10) 以及他们所有人上传的观看次数在前20的视频
select *
from
(
select uploader --上传视频最多的前十用户
from t_user
order by videos desc
limit 10
) t1
left join t_video t2
on t1.uploader = t2.uploader
order by t2.views desc
limit 20;
-- 理解方式二:
-- 统计上传视频最多的前十用户(Top10) 以及他们每个人上传的观看次数在前20的视频
select t3.uploader,t3.videoid,t3.views
from (
select t2.uploader,t2.videoid,t2.views, --每个人上传的观看次数排名
rank() over(partition by t2.uploader order by t2.views desc) rk
from
(
select uploader,videos --上传视频最多的前十用户
from t_user
order by
videos desc
limit 10
) t1
left join t_video t2
on t1.uploader = t2.uploader
) t3
where t3.rk <=20
6.统计每个类别中的视频"热度"(观看次数) 的Top10
select *
from (
select *,rank() over (partition by t2.categ order by t1.views desc) rk
from t_video t1
lateral view explode(category) t2 as categ
) t3
where t3.rk <= 10;