进阶HQL练习-谷粒影音

数据源:https://download.csdn.net/download/qq_41106844/12657640

-- 默认表
create table gulivideo_ori(
    videoId string, 
    uploader string, 
    age int, 
    category array<string>, 
    length int, 
    views int, 
    rate float, 
    ratings int, 
    comments int,
    relatedId array<string>)
row format delimited 
fields terminated by "\t"
collection items terminated by "&"
stored as textfile
;

create table gulivideo_user_ori(
    uploader string,
    videos int,
    friends int)
row format delimited 
fields terminated by "\t" 
stored as textfile
;

-- ORC格式
create table gulivideo_orc(
    videoId string,           -- 视频ID
    uploader string,          -- 上传程序
    age int,                  -- 年龄
    category array<string>,   -- 类别
    length int,               --长度
    views int,                --视频观看数
    rate float,               --偏移值
    ratings int,              --额定值
    comments int,             --注释
    relatedId array<string>)  --相关视频ID
row format delimited fields terminated by "\t" 
collection items terminated by "&" 
stored as orc
;

create table gulivideo_user_orc(
    uploader string,
    videos int,
    friends int)
row format delimited 
fields terminated by "\t" 
stored as orc
;

-- 导入数据
load data inpath "/gulivideo/output/video/2008/0222" into table gulivideo_ori;
load data inpath "/gulivideo/user/2008/0903" into table gulivideo_user_ori;

-- 数据转存
insert into table gulivideo_orc select * from gulivideo_ori;
insert into table gulivideo_user_orc select * from gulivideo_user_ori;
统计视频观看数 Top10

按照视频观看数降序排序,然后limit分页。

select
    videoId, 
    uploader, 
    age, 
    category, 
    length, 
    views, 
    rate, 
    ratings, 
    comments 
from 
    gulivideo_orc 
order by 
    views desc
limit 10
;
统计视频类别热度 Top10
  • 即统计每个类别有多少个视频,显示出包含视频最多的前 10 个类别。
  • 我们需要按照类别 group by 聚合,然后 count 组内的 videoId 个数即可。
  • 因为当前表结构为:一个视频对应一个或多个类别。所以如果要 group by 类别,需要先
    将类别进行列转行(展开),然后再进行 count 即可。
  • 最后按照热度排序,显示前 10 条。
select 
    category_name as category, 
    count(t1.videoId) as hot 
from (
    select 
        videoId,
        category_name 
    from 
        gulivideo_orc 
        lateral view explode(category) t_catetory as category_name
    ) t1 
group by 
    t1.category_name 
order by 
    hot desc 
limit 
    10
;
统计视频观看数 Top20 所属类别以及类别包含的 Top20 的视频个数
  • 先找到观看数最高的 20 个视频所属条目的所有信息,降序排列
  • 把这 20 条信息中的 category 分裂出来(列转行)
  • 最后查询视频分类名称和该分类下有多少个 Top20 的视频
select 
    category_name as category, 
    count(t2.videoId) as hot_with_views 
from (
    select 
        videoId, 
        category_name 
    from (
        select 
            * 
        from 
            gulivideo_orc 
        order by 
            views desc 
        limit 
            20
        ) t1 
        lateral view explode(category) t_catetory as category_name
    ) t2 
group by 
    category_name 
order by 
    hot_with_views desc
;
统计视频观看数 Top50 所关联视频的所属类别
  • 查询视频观看数前50
  • 将这50个视频的关联视频列转行
  • 将相关视频的 id 和 gulivideo_orc 表进行 inner join 操作
  • 按照视频类别进行分组,统计每组视频个数,然后排行
select 
    category_name as category, 
    count(t5.videoId) as hot 
from (
    select 
        videoId, 
        category_name 
    from (
        select 
            distinct(t2.videoId), 
            t3.category 
        from (
           select 
               explode(relatedId) as videoId
           from (
               select
                   * 
               from 
                   gulivideo_orc 
               order by 
                   views desc 
               limit 50
           ) t1
        ) t2 
    inner join
        gulivideo_orc t3 on t2.videoId = t3.videoId
    ) t4 
    lateral view explode(category) t_catetory as category_name
) t5
group by 
    category_name 
order by 
   hot desc
;
统计每个类别中的视频热度 Top10
  • 按照视频播放量排序
  • 取出前十条
select
    videoId,
    views
from
    gulivideo_orc 
    lateral view explode(category) catetory as categoryId
where 
    categoryId = "Music" 
order by 
    views desc 
limit 10
;
统计每个类别中视频流量 Top10
  • 同上
select 
    videoId,
    views,
    ratings 
from 
    gulivideo_orc
    lateral view explode(category) catetory as categoryId
where 
    categoryId = "Music" 
order by 
    ratings desc 
limit 
    10
;
统计上传视频最多的用户 Top10 以及他们上传的观看次数在前 20 视频
  • 通过用户表的上传ID与视频ID连接
  • 按照视频播放量排序
  • 取出前20条
select 
    t2.videoId, 
    t2.views,
    t2.ratings,
    t1.videos,
    t1.friends 
from (
    select 
        * 
    from 
        gulivideo_user_orc 
    order by 
        videos desc 
    limit
       10
    ) t1 
join 
    gulivideo_orc t2 on t1.uploader = t2.uploader 
order by 
    views desc 
limit 
     20
 ;
统计每个类别视频观看数 Top10
  • 使用窗口函数将序号rank添加上去
  • 使用rank取前10位
select 
    t1.* 
from (
    select 
        videoId,
        categoryId,
        views,
        row_number() over(partition by categoryId order by views desc) rank 
    from 
        gulivideo_orc
        lateral view explode(category) catetory as categoryId
    ) t1 
where 
     rank <= 10
 ;
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

寒 暄

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值