文章目录
题目1
A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
C,2015-03,20
字段说明: username: 姓名
datetime : 日期
cost_money : 消费金额
**需求:**
1.计算出每个人每月的消费金额?
2.计算出每个人截止到当月的消费总金额?
建表
create table test1( username string,datetime string,cost_money int)
row format delimited fields terminated by ",";
load data local inpath '/opt/module/hive/datas' into table test1;
1.计算出每个人每月的消费金额?
select *,sum(cost_money) over(partition by username,substr(datetime,1,7))
from test1;

2.计算出每个人截止到当月的消费总金额?
select *,sum(cost_money) over(partition by username rows between
unbounded preceding and current row) from test1;

题目2
create table test2 (user_id string,shop string);
insert into table test2 values ('1','a');
insert into table test2 values ('1','b');
insert into table test2 values ('2','a');
insert into table test2 values ('3','c');
insert into table test2 values ('1','a');
insert into table test2 values ('1','a');
访客的用户id为user_id,被访问的店铺名称为shop,
请统计:
1)每个店铺的UV(访客数)
2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
1)每个店铺的UV(访客数)
方式一:
select distinct(num),shop from
(select shop,count(user_id) over(partition by shop) as count_user from test2) t;
方式二:
select shop,count(user_id) from test2 group by shop;

2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
分析:先求出每个店铺的访客数,再排序,再取前三
select * from
(select shop,user_id,count_user,rank() over(partition by shop order by count_user) shop_user from
(select shop,user_id,count(user_id) count_user from test2 group by shop,user_id)t1)t2
where shop_user < 4 ;

题目3
有一个5000万的用户文件(user_id,name,age),一个2亿记录的用户看电影的记录文件(user_id,url),根据年龄段观看电影的次数进行排序?
1.建表
create table forth_user(user_id string,name string,age int);
create table forth_log(user_id string,url string);
forth_user:
insert into table forth_user values('001','wt',10);
insert into table forth_user values('002','ls',18);
insert into table forth_user values('003','zz',30);
insert into table forth_user values('004','zz',50);
forth_log:
insert into table forth_log values('001','sdf');
insert into table forth_log values('001','wss');
insert into table forth_log values('002','sdf');
insert into table forth_log values('003','sdf');
insert into table forth_log values('004','sdf');
2.分析需求
先求出每个人看了几次电影,t1
然后t1和user表join,拼接age字段 t2表
划分年龄段,0-20,20-40,40-60,60--
按年龄段分组,按照次数排序
采用多表连接查询
select b.age_phase,sum(a.count_user) view_count
from
(select user_id,count(*) count_user from three_log group by user_id) a
join
(select user_id,
case
when age >0 and age<=20 then '0-20'
when age >20 and age<=40 then '20-40'
when age >40 and age<=60 then '40-60'
else '60以上' end age_phase
from three_user) b
on
a.user_id=b.user_id
group by b.age_phase;

题目四
20191020,11111,85
20191020,22222,83
20191020,33333,86
20191021,11111,87
20191021,22222,65
20191021,33333,98
20191022,11111,67
20191022,22222,34
20191022,33333,88
20191023,11111,99
20191023,22222,33
需求:
1、使用 over() 函数进行数据统计, 统计每个用户及表中数据的总数
2、求用户明细并统计每天的用户总数
3、计算从第一天到现在的所有 score 大于80分的用户总数
4、计算每个用户到当前日期分数大于80的天数
create table test_window1(
datetime string,userid string,score int
)
row format delimited fields terminated by ',';
load data local inpath '/opt/module/datas/test/test_window1.txt' into table test_window1;
1、使用 over() 函数进行数据统计, 统计每个用户及表中数据的总数
select * ,count(userid) over() totol from test_window1;
2、求用户明细并统计每天的用户总数
select *,count(userid) over(partition by datetime) totol from test_window1;
3、计算从第一天到现在的所有 score 大于80分的用户总数
select *,count(userid) over(order by datetime rows between unbounded preceding and current row) totol from test_window1 where score > 80;
4、计算每个用户到当前日期分数大于80的天数
select *,count(userid) over(partition by datetime rows between unbounded preceding and current row) totol from test_window1 where score > 80 order by datetime,userid;
题目五
jack,2017-01-01,10
tony,2017-01-02,15
jack,2017-02-03,23
tony,2017-01-04,29
jack,2017-01-05,46
jack,2017-04-06,42
tony,2017-01-07,50
jack,2017-01-08,55
mart,2017-04-08,62
mart,2017-04-09,68
neil,2017-05-10,12
mart,2017-04-11,75
neil,2017-06-12,80
mart,2017-04-13,94
create table business(
name string,orderdate string,cost int)
row format delimited fields terminated by ',';
load data local inpath '/opt/module/datas/test/business.txt' into table business;
1、查询在2017年4月份购买过的顾客及总人数
select *,count(name) over()
from business substr(orderdate,1,7)='2017-04';
2、查询顾客的购买明细及月购买总额
select *,sum(cost) over(partition by name,substr(orderdate,1,7)) from business;
3、查询顾客的购买明细及到目前为止每个顾客购买总金额
select *,sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row) from business ;
4、查询顾客上次的购买时间----lag()over()偏移量分析函数的运用
select *,lag(orderdate,1) over(partition by name order by orderdate) from business;
5、查询前20%时间的订单信息
select * from (select *,ntile(5) over(order by orderdate) ntile_num from business) t1 where t1.ntile_num = 1;
题目六
孙悟空 语文 87
孙悟空 数学 95
孙悟空 英语 68
大海 语文 94
大海 数学 56
大海 英语 84
宋宋 语文 64
宋宋 数学 86
宋宋 英语 84
婷婷 语文 65
婷婷 数学 85
婷婷 英语 78
create table score(
name string,subject string,score int)
row format delimited fields terminated by ' ';
load data local inpath '/opt/module/datas/test/score.txt' into table score;
1、每门学科学生成绩排名(是否并列排名、空位排名三种实现)
select *,
row_number() over(partition by subject order by score desc)
rank() over(partition by subject order by score desc)
dense_rank() over(partition by subject order by score desc)
from score;
2、每门学科成绩排名top n的学生
select * from
(select *,row_number() over(partition by subject order by score) rmp from score ) t
where t.rmp<=3;
683

被折叠的 条评论
为什么被折叠?



