HQL函数--打地鼠游戏及WordCount案例分析及实现

1.打地鼠

uid,hit,m
1,1,0
1,2,1
1,3,1
1,4,1
1,5,0
1,6,0
1,7,1
2,1,1
2,2,1
2,3,1
2,4,1
2,5,1
3,1,1
3,2,1
3,3,1
3,4,0
3,5,0
3,6,1
3,7,0
3,8,1

create table tb_ds(
      uid int ,  -- 用户名
      hit int ,  -- 第几次打地鼠
      m int      -- 是否命中 1命中 0 未命中
)
row format delimited fields terminated by ','  ;
load data local inpath '/root/ds.txt' into table tb_ds ;

select  * from tb_ds;

 案例分析

查询用户最大连续命中次数

--查询命中的的记录
select *
from tb_ds
where m = 1;

-- 用户分组 记录编号
select uid, hit, row_number() over (partition by uid order by hit) as rn
from tb_ds
where m = 1;

-- hit-rn 
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1)
select *, (hit - rn) as sub
from t1

-- 分组得到每个用户的连续击中
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1),
     t2 as (select *, (hit - rn) as sub from t1)
select uid, count(*) as hit_count
from t2
group by uid, sub

-- 用户分组得到每个用户最大次数
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1),
    t2 as (select *, (hit-rn) as sub from t1 ),
    t3 as (select uid, count(*) as hit_count from t2 group by uid, sub )
select uid, max(hit_count) as hit_count
from t3
group by uid;

2.WordCount

 1.建表数据

 Why Studying History Matters
Studying a subject that you feel pointless is never a fun or easy task.
If you're study history, asking yourself the question "why is history important" is a very good first step.
History is an essential part of human civilization.
You will find something here that will arouse your interest, or get you thinking about the significance of history.

2.分析 

 

 

create table t_wc(
    line string
)row format delimited lines terminated by "\n";

load data local inpath '/root/word.txt' overwrite into table t_wc;
select * from t_wc;
-- 使用正则表达式去掉特殊符号 将 除了单词字符 及 ' 和 空格的 所有字符替换成空串
select regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', "") as word_line
from t_wc;
-- 使用split进行切割 将一行数据切割为一个数组
select split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")
from t_wc;
-- 使用expload函数进行炸裂
select explode(split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")) as word
from t_wc;
-- 分组得到每个单词的次数
with t1 as (select explode(split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")) as word from t_wc)
select word, count(1) as word_count
from t1
group by word;

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值