hive的练习题

最新推荐文章于 2024-06-13 15:45:31 发布

置顶 weidajiangjiang

最新推荐文章于 2024-06-13 15:45:31 发布

阅读量4.7k

点赞数 5

文章标签： hive 练习题

本文链接：https://blog.csdn.net/weidajiangjiang/article/details/101320624

版权

hive的练习题

1：编写sql实现每个用户截止到每月为止的最大单月访问次数和累计到该月的总访问次数

数据： userid,month,visits
A,2015-01,5 
A,2015-01,15 
B,2015-01,5 
A,2015-01,8 
B,2015-01,25 
A,2015-01,5 
A,2015-02,4 
A,2015-02,6 
B,2015-02,10 
B,2015-02,5 
A,2015-03,16 
A,2015-03,22 
B,2015-03,23 
B,2015-03,10 
B,2015-03,1 


create table visits(
userid string, 
month string, 
visits int 
) 
row format delimited fields terminated by ','
;
load data local inpath '/hivedata/visits.txt' overwrite into table visits;



select 
userid, 
month, 
visits, 
max(visits) over(distribute by userid sort by month) maxvisit, 
sum(visits) over(distribute by userid sort by month) totalvisit
from (
select 
userid, 
month, 
sum(visits) visits 
from visits 
group by userid,month
) t1 ;

2：求出每个栏目的被观看次数及累计观看时长

数据: vedio表
Uid channel min 
1   1   23 
2   1   12 
3   1   12 
4   1   32 
5   1   342 
6   2   13 
7   2   34 
8   2   13 
9   2   134


参考答案： create table video( 
Uid int, 
channel string, 
min int ) 
row format delimited fields terminated by ' ' 
; 
load data local inpath '/home/hivedata/video.txt' into table video;
select 
channel, 
count(Uid) num,    --也可以写成count（1）
sum(min) total 
from
video 
group by channel ;

3: 编写连续7天登录的总人数

数据: t1表
Uid  dt  login_status(1登录成功,0异常)
1  2019-07-11 1 
1  2019-07-12 1 
1  2019-07-13 1 
1  2019-07-14 1 
1  2019-07-15 1 
1  2019-07-16 1 
1  2019-07-17 1 
1  2019-07-18 1 
2  2019-07-11 1 
2  2019-07-12 1 
2  2019-07-13 0 
2  2019-07-14 1 
2  2019-07-15 1 
2  2019-07-16 0 
2  2019-07-17 1 
2  2019-07-18 0
3  2019-07-11 1 
3  2019-07-12 1 
3  2019-07-13 1 
3  2019-07-14 1 
3  2019-07-15 1 
3  2019-07-16 1 
3  2019-07-17 1 
3  2019-07-18 1
create table login1(
uid int, 
dt string, 
login_status int 
) row format delimited fields terminated by ' '
;
load data local inpath '/hivedata/login.txt' into table login;
select
uid, 
dt 
from 
(
select 
t1.uid uid, 
date_sub(t1.dt,t1.rm) dt
from (
select 
uid, 
dt,
row_number() over(distribute by uid sort by dt) rm 
from 
login1 
where login_status=1
) t1
)t2 
group by uid,dt 
having count(uid) =7 ;


select
tmp.uid uid,
tmp.dt
from(
select
uid,
date_sub(dt,row_number() over(distribute by uid sort by dt)) as dt,
login_status
from login1
where login_status=1
) tmp
group by uid,dt
having count(uid)>7
;

4: 编写sql语句实现每班前三名，分数一样并列，同时求出前三名按名次排序的一次的分差：

数据： stu表 
Stu_no class score
1	1901	90
2	1901	90
3	1901	83
4	1901	60
5	1902	66
6	1902	23
7	1902	99
8	1902	67
9	1902	87

select
tmp.class,
tmp.stu_no,
tmp.score,
tmp.score-nvl(LAG(tmp.score) over(distribute by tmp.class sort by rn),0),
tmp.rn
from(
select
class,
stu_no,
score,
row_number() over(distribute by class sort by score desc) rn
from student
) tmp
where tmp.rn<4
;



create table student( 
stu_no int, 
class string, 
score int
) 
row format delimited fields terminated by '\t'
;
load data local inpath '/home/hivedata/stu.txt' into table stu;
select 
class, 
stu_no, 
score, 
dr, 
score-nvl(lag(score) over(distribute by class sort by dr),0) 
from (
select 
class, 
stu_no,
score, 
row_number() over(distribute by class sort by score desc) dr 
from student) t1 
where t1.dr<4 ;

5:每个店铺的当月销售额和累计到当月的总销售额

数据：
店铺,月份,金额 
a,01,150
a,01,200
b,01,1000
b,01,800
c,01,250
c,01,220
b,01,6000
a,02,2000
a,02,3000
b,02,1000

create table shop1(
 shopid string,
 month string,
 money int ) 
 row format delimited fields terminated by ',' ;
 load data local inpath '/home/hivedata/shop1.txt' overwrite into table shop1;
 
 
 select
 shopid,
 month,
 tmp.m,
 tmp.m+lag(tmp.m,1,0) over(partition by shopid order by month)
 from(
 select
 shopid,
 month,
 sum(money) m
 from
 shop1
 group by shopid,month
 ) tmp
 group by shopid,month,tmp.m;
 
select
t1.shopid,
t1.month,
t1.m,
sum(t1.m) over(partition by shopid order by month)
from
(select 
shopid,
month,
sum(money) m
from
shop1
group by shopid,month)t1

6:订单及订单类型行列转换

t1表: 

order_id order_type order_time

111 N 10:00

111 A 10:05

111 B 10:10

是用hql获取结果如下： 

order_id order_type_1 order_type_2 order_time_1 order_time_2

111 N A 10:00 10:05

111 A B 10:05 10:10

create table t11

(order_id int,

order_type string,

order_time string)

row format delimited fields terminated by ' '

load data local inpath '/home/hadoop/exam/t11' into table t11

select

*

from

(select 

order_id,

order_type,

lead(order_type,1) over(partition by order_id order by order_time) order_type1,

order_time, 

lead(order_time,1) over(partition by order_id order by order_time) order_time1

from 

t11)t1

where t1.order_type1 is not null

7://找出用户在表中的第一次行为

select 
t1.id,
t1.time,
t1.action
from
(select 
id,
time,
action,
first_value(time) over(partition by id order by time) first
from
test2)t1
where t1.time=t1.first;

8: 某APP每天访问数据存放在表access_log里面，包含日期字段 ds,用户类型字段user_type，用户账号user_id,用户访问时间 log_time,请使用hive的hql语句实现如下需求

(1)、每天整体的访问UV、PV? 

(2)、每天每个类型的访问UV、PV? 

(3)、每天每个类型中最早访问时间和最晚访问时间? 

(4)、每天每个类型中访问次数最高的10个用户?

没有数据！！！！！！！！！！！！！！！！！11

9：每个用户连续登陆的最大天数

login表 
uid,udate
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
结果如下： 
uid cnt_days 
1 3 
2 2 
3 1 
4 3

select
b.uid,
max(b.c)
from (
select
uid,
count(a.e) c
from (
select
uid,
udate d,
date_sub(udate,row_number() over(partition by uid order by udate)) as e
from login) a
group by a.uid,a.e) b
group by b.uid;




select
uid,
max(tmp1.total)
from(
select
tmp.uid,
count(tmp.dt) total
from(
select
uid,
date_sub(udate,row_number() over(partition by uid order by udate)) as dt
from login
) tmp
group by tmp.uid,tmp.dt
) tmp1
group by tmp1.uid
;

10:使用hive的hql实现男女各自第一名及其它

id sex chinese math 
0 0 70 50
1 0 90 70
2 1 80 90

1、男女各自语文第一名（0:男，1:女） 

2、男生成绩语文大于80，女生数学成绩大于70

create table cource
(id int,
sex int,
chinese int,
math int)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/cource.txt' into table cource;



1:
select
tmp.sex,
fir
from
(
select
sex,
first_value(chinese) over(partition by sex order by chinese desc) fir
from
cource) tmp
;


select
sex,
first_value(chinese) over(partition by sex order by chinese desc) fir
from
cource;
或者：
select
*
from
(select 
id,
sex,
chinese,
math,
row_number() over(partition by sex order by chinese desc) rr
from
cource)t1
where t1.rr = 1


2：
select
* 
from
cource
where sex=0 and chinese>80
union
select
*
from
cource
where sex=1 and manth>70;

10:课程行转列

数据：
t1表
id course
1,a
1,b
1,c
1,e
2,a
2,c
2,d
2,f
3,a
3,b
3,c
3,e

根据编写sql，得到结果如下(表中的1表示选修，表中的0表示未选修)：
id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0

select id, 
sum(case course when 'a' then 1 else 0 end) a,
sum(case course when 'b' then 1 else 0 end) b,
sum(case course when 'c' then 1 else 0 end) c, 
sum(case course when 'd' then 1 else 0 end) d, 
sum(case course when 'e' then 1 else 0 end) e, 
sum(case course when 'f' then 1 else 0 end) f 
from t1 group by id ;



select
id,
sum(case when subject="语文" then score else 0 end) as "语文",
sum(case when subject="数学" then score else 0 end) as "数学",
sum(case when subject="英语" then score else 0 end) as "英语",
sum(case when subject="政治" then score else 0 end) as "政治",
sum(score) as total
from
score

11：编写sql实现如下：

数据： t1表 

uid tags 
1 1,2,3
2 2,3
3 1,2

编写sql实现如下结果： 
uid tag 
1 1 
1 2 
1 3 
2 2 
2 3 
3 1 
3 2
create table t111
(uid int,
tags string)
row format delimited fields terminated by ' ';
load data local inpath '/home/hadoop/exam/t111' into table t111
select 
uid,
tag
from t111
lateral view explode(split(tags,',')) tags as tag

12:用户标签连接查询

数据：
T1表:
Tags
1,2,3
1,2
2,3

T2表:
Id  lab
1 A
2 B
3 C

根据T1和T2表的数据，编写sql实现如下结果：
ids tags
1,2,3   A,B,C
1,2 A,B
2,3 B,C

create table tab1(
tags array<string>
)
row format delimited fields terminated by ' '
collection items terminated by ','
;

create table tab2( 
id int, 
lab string 
) 
row format delimited fields terminated by ' ' ; 


select tags,cj from tab1 lateral view explode(tags) tags as cj;
select
tmp01.tags,
collect_list(tmp01.lab)
from(
select
tmp.tags,
tab2.lab
from(
select tags,cj from tab1 lateral view explode(tags) tags as cj
) tmp inner join tab2
on tmp.cj=tab2.id) tmp01
group by tmp01.tags;

13:用户标签组合

数据： 

t1表： 
id tag flag 
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8 

编写sql实现如下结果： 
id tag flag
a b 1|2|3
c d 6|8

create table s1
(id string,
tag string,
flag string)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/s1' into table s1；

select
id,
tag,
concat_ws('|',collect_set(flag))
from
s1
group by id,tag;

select
t1.id,
t1.tag,
concat_ws("|",collect_list(t1.flag))
from
(select
id id,
tag tag,
flag flag
from s1
order by flag)t1
group by t1.id,t1.tag;

14: 用户标签行列互换

用户标签行列互换

t1表 
uid name tags 
1 goudan chihuo,huaci
2 mazi sleep
3 laotie paly

编写sql实现如下结果： 
uid name tag
1 goudan chihuo 
1 goudan huaci 
2 mazi sleep 
3 laotie paly

create table k1
(
uid int, 
name string,
tags array<string>
)
row format delimited fields terminated by ' '
collection items terminated by ',';

load data local inpath '/home/hivedata/k1' into table k1


select uid,name,cj from k1 lateral view explode(tags) tags as cj;

15:词频统计

词频统计

数据： 
t1表： 
uid contents
1 i|love|china
2 china|is|good|i|i|like 

统计结果如下,如果出现次数一样，则按照content名称排序： 
content cnt
i 3
china 2
good 1
like 1
love 1
is 1

create table l1
(uid int,
contents string)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/l1' into table l1;

select explode(split(contents,'\|')) from l1;
select cj from l1 lateral view explode(split(contents,'\\|')) contents as cj ;


select
t1.cj cj,
count(t1.cj) cnt
from (
select cj from l1 lateral view explode(split(contents,'\\|')) contents as cj
) t1
group by t1.cj
order by cnt desc;



select ct, count(1) cn from (select ct from l1 lateral view explode(split(contents,"\\|")) tmp as ct) tmp group by ct order by cn desc,ct ;

16: 兴趣行转列

t1表 
name    sex hobby 
janson  男   打乒乓球、游泳、看电影 
tom 男   打乒乓球、看电影

hobby多3个值，使用hql实现结果如下:
name    sex hobby1  hobby2  hobby3
janson  男   打乒乓球    游泳     看电影 
tom 男   打乒乓球    看电影
create table ho(
name  string,
sex  string, 
hobby array<string> 
) 
row format delimited fields terminated by '\t' 
collection items terminated by ',' 
; 

load data local inpath '/home/hivedata/hobby.txt' overwrite into table ho;

select 
name, 
sex, 
case when true then nvl(hobby[0],"") else null end hobby1, 
case when true then nvl(hobby[1],"") else null end hobby2, 
case when true then nvl(hobby[2],"") else null end hobby3
from ho
;

17:求top3英雄及其pick率----------------------

1	亚索,挖掘机,艾瑞,洛,卡沙
2	亚索,盖伦,奥巴马,牛头,皇子
3	亚索,盖伦,艾瑞,宝石,琴女
4	亚索,盖伦,赵信,老鼠,锤石

create table heros(
id int,
names array<string>
)
row format delimited fields terminated by '\t'
collection items terminated by ',';

load data local inpath '/home/hivedata/heros.txt' overwrite into table heros;

select
tmp.cj,
tmp.tal,
round((tmp.tal/sum(tmp.tal)),2)
from
(select
cj,
count(cj) tal
from heros
lateral view explode(names) names as cj
group by cj) tmp
group by cj,tal
limit 3;



select
name,
count(1) ann,
round(count(1)/totalcn,2) 
from (
select 
count(name) over() totalcn, 
name 
from 
heros 
lateral view explode(names) t as name
) t1 
group by name,totalcn
order by ann desc
limit 3;

18:时间格式转换

数据: t1表
20190730 20190731
编写sql实现如下的结果：
2019-07-30 2019-07-31


create table dt(
dt string
);
load data local inpath '/home/hivedata/dt.txt' overwrite into table dt;
select from_unixtime(
unix_timestamp(dt,"yyyyMMdd"),"yyyy-MM-dd" 
) 
from 
dt 
;

19：两个表A 和B ，均有key 和value 两个字段，写一个SQL语句，将B表中的value值置成A表中相同key值对应的value值

A:
key vlaue 
k1 123
k2 234
k3 235
B:
key value
k1 111
k2 222
k5 246
使用hive的hql实现，结果是B表数据如下: 
k1 123
k2 234
k5 246

create table A
(key string,
value int)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/A' into table A;
create table B
(key string,
value int)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/B' into table B;


select
B.key,
if(A.key is null,B.value,A.value) value
from
B left join A
on B.key = A.key;

2111:有用户表user(uid,name)以及黑名单表Banuser(uid)

1、用left join方式写sql查出所有不在黑名单的用户信息
2、用not exists方式写sql查出所有不在黑名单的用户信息

create table u( 
id string, 
name string )
row format delimited fields terminated by ',' ;

create table banuser( 
id string 
) 
;
load data local inpath '/hivedata/banuser.txt' overwrite into table banuser; load data local inpath '/hivedata/u.txt' overwrite into table u;
1、
select
u.id, 
u.name 
from u 
left join banuser b 
on u.id=b.id
where 
b.id is null ;
2、
select
u.id, 
u.name
from 
u 
where
not exists (
select 1 from banuser b where u.id=b.id
) 
;

20访问日志正则提取

表t1(注:数据时正常的访问日志数据,分隔符全是空格)
8.35.201.160 - - [16/May/2018:17:38:21 +0800] "GET /uc_server/data/avatar/000/01/54/22_avatar_middle.jpg HTTP/1.1" 200 5396

使用hive的hql实现结果如下:
ip  dt  url
8.35.201.160    2018-5-16 17:38:21  /uc_server/data/avatar/000/01/54/22_avatar_middle.jpg

create table login_log( log  string )
load data local inpath '/hivedata/login_log.txt' overwrite into table login_log;
select regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",1), from_unixtime(unix_timestamp(regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",2),"dd/MMM/yyyy:HH:mm:ss Z"),"yyyy-MM-dd HH:mm:ss"), regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",4) from login_log where log regexp "([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)" ;

21 使用hive的hql查询用户所在部门

dpt表
dpt_id dpt_name
1   产品 
2   技术
user_dpt表 
user_id dpt_id
1   1 
2   1 
3   2 
4   2 
5   3
result表
user_id dpt_id dpt_name
1       1       产品 
2       1       产品 
3       2       技术 
4       2       技术 
5       3       其他部门

create table dpt( 
dpt_id int, 
dpt_name string 
) 
row format delimited fields terminated by '\t' ; 

load data local inpath '/hivedata/dpt.txt' overwrite into table dpt;

create table user_dpt(
user_id int, 
dpt_id int )
row format delimited fields terminated by '\t' ; 
load data local inpath '/hivedata/user_dpt.txt' overwrite into table user_dpt;


select 
t1.user_id, 
t1.dpt_id, 
nvl(t2.dpt_name,"其他部门")
from
user_dpt t1 
left join dpt t2 
on t1.dpt_id=t2.dpt_id ;

22 查出每个学期每门课程最高分记录

course_score表: 
id,name,course,score,term 
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,lisi,数学,70,2016
4,wangwu,化学,80,2017
5,zhangsan,语文,85,2015
6,zhangsan,化学,80,2015
7,wangwuww,化学,90,2017
8,zhangsanwww,语文,85,2015

编写sql完成如下查询，一次查询实现好，也可以写多次查询实现: 
1、查出每个学期每门课程高分记录(包含全部5个字段) 
2、查出单个学期中语文课在90分以上的学生的数学成绩记录(包含全部5个字段)

create table test4
(id int,
name string,
course string,
score int,
term int)
row format delimited fields terminated by ','

load data local inpath '/home/hivedata/test4' into table test4;

select 
t1.*
from
test4 t1
inner join
(select 
course,
max(score) score,
term
from
test4
group by course,term)t2
on t2.score = t1.score and t2.term = t1.term and t1.course = t2.course;

select 
test4.*
from test4
inner join
(select 
id,name,course,score,term
from test4
where score>=90 and course='语文')t
on
test4.term = t.term
where test4.course='数学' 
 ----------------------------and t.id is not null

23：设计数据库表，用来存放学生基本信息，课程信息，学生的课程及成绩，并给出sql语句，查询平均成绩大于85的所有学生

stu_1 
id,name,age,addr 
1,zs1,22,bj 
2,zs2,22,bj 
3,zs3,22,bj 
4,zs4,22,bj 
5,zs5,22,bj
course_1 
cid,cname 
1,语文 
2,数学 
3,政治 
4,美术 
5,历史
course_sc 
id,cid,score 
1,1,87 
1,2,92
1,3,69 
2,2,83 
2,3,92 
2,4,87 
2,5,83
create table stu_1( 
id string, 
name string,
age int, 
addr string )
row format delimited fields terminated by ',' ;
create table course_1( 
cid string, 
cname string 
) 
row format delimited fields terminated by ',' ;

create table course_sc(
id string,
cid string, 
score int )
row format delimited fields terminated by ',' ;

load data local inpath '/hivedata/course_1.txt' overwrite into table course_1; load data local inpath '/hivedata/stu_1.txt' overwrite into table stu_1; load data local inpath '/hivedata/course_sc.txt' overwrite into table course_sc; 


select cs.id, avg(score) avgscore from course_sc cs group by cs.id having avgscore>85 ;

24:每个渠道的下单用户数、订单总金额

请使用hive hql查询出2019-08-06号 每个渠道的下单用户数、订单总金额。 hql语句实现，结果表头如下： channel user_num    order_amount

create table order_34( 
order_id bigint,
user_id int, 
amount double,
channel string,
time string comment 'yyyy-MM-dd HH:mm:ss' 
) 
partitioned by(dt string comment 'yyyy-MM-dd')
row format delimited fields terminated by ',' ;

1,100,19,a,2019-08-06 19:00:00 
2,101,19,b,2019-08-06 19:00:01 
3,100,19,a,2019-08-05 19:00:00 
4,101,19,b,2019-08-05 19:00:01
5,102,19,a,2019-08-06 19:00:00
6,102,19,a,2019-08-06 19:00:01

load data local inpath '/hivedata/order_34.txt' into table order_34 partition(dt='2019-08-06');

hive hql查询出2019-08-06号 每个渠道的下单用户数、订单总金额。

select 
channel,
count(distinct user_id), 
sum(amount) 
from order_34
where dt='2019-08-06' and to_date(time)='2019-08-06' 
group by 
channel,to_date(time) ;

25:登录且阅读的用户数，已经阅读书籍数量及其它

有如下三张表: 
表A(登录表):
ds  user_id
2019-08-06  1
2019-08-06  2
2019-08-06  3
2019-08-06  4
表B(阅读表): 
ds  user_id read_num 
2019-08-06  1   2 
2019-08-06  2   3 
2019-08-06  3   6
表C(付费表):
ds  user_id price 
2019-08-06  1   55.6
2019-08-06  2   55.8
基于上述三张表，请使用hive的hql语句实现如下需求：
(1)、用户登录并且当天有阅读的用户数，已经阅读书籍数量 
(2)、用户登录并且阅读，但是没有付费的用户数
(3)、用户登录并且付费，付费用户书籍和金额



create table if not exists lg(
ds string,
user_id string
)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/lg.txt' into table lg;

create table if not exists read(
ds string,
user_id string,
read_num  int)
row format delimited fields terminated by ' ';
load data local inpath '/home/hivedata/read.txt' into table read;

create table if not exists pay(
ds string,
user_id string,
price  double)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/pay.txt' into table pay;
(1)、用户登录并且当天有阅读的用户数，已经阅读书籍数量
select
count(1),
sum(read_num)
from read r
join lg l
on l.user_id = r.user_id and l.ds = r.ds
group by r.ds;
(2)、用户登录并且阅读，但是没有付费的用户数
select
r.ds ds, 
count(1)
from read r
left semi join lg l
on l.user_id = r.user_id and l.ds = r.ds
left join pay  p
on r.ds=p.ds and r.user_id=p.user_id
where p.user_id is null
group by r.ds;
(3)、用户登录并且付费，付费用户id和金额
select 
p.user_id,
p.price
from pay p
left join lg
on p.user_id=lg.user_id and p.ds=lg.ds
where lg.user_id is not null;

26:高消费者报表

有三个表，分别是： 
区域(district) 区域中有两个字段分别是区域Id(disid)和区域名称(disname) 
城市(city) 城市有两个字段分别是城市ID(cityid)和区域ID(disid)
订单(order) 订单有四个字段分别是订单ID(orderid)、用户ID(userid)、城市ID(cityid)和消费金 额(amount)。
district表:
disid   disname
1   华中 
2   西南
city表: 
cityid  disid 
1   1
2   1 
3   2 
4   2
5   2
order表： 
oid userid  cityid  amount 
1   1   1   1223.9 
2   1   1   9999.9 
3   2   2   2322 
4   2   2   8909 
5   2   3   6789 
6   2   3   798 
7   3   4   56786 
8   4   5   78890
高消费者是消费金额大于1W的用户，使用hive hql生成如下报表: 区域名 高消费者人数  消费总额


create table district( 
disid string, 
disname string )
row format delimited fields terminated by '\t' ;

create table city( 
cityid string,
disid string )
row format delimited fields terminated by '\t' ;


create table order_29( 
oid string,
userid string, 
cityid string,
amount double ) 
row format delimited fields terminated by '\t' ; 

load data local inpath '/hivedata/district.txt' into table district;
load data local inpath '/hivedata/city.txt' into table city;
load data local inpath '/hivedata/order_29.txt' into table order_29;

select 
b.disid,
c.disname, 
a.userid, 
sum(a.amount)
from order_29 a join city b 
on a.cityid=b.cityid 
join district c 
on b.disid = c.disid 
group by b.disid,c.disname,a.userid；

27：请使用sql计算pv.uv

数据:t1表
uid dt    url
1   2019-08-06  http://www.baidu.com
2   2019-08-06  http://www.baidu.com
3   2019-08-06  http://www.baidu.com
3   2019-08-06  http://www.soho.com
3   2019-08-06  http://www.meituan.com
3   2019-08-06 

结果如下:
date uv pv2019-08-6 3 5


create table uv_pv(
uid string,
dt string,
url string
)
row format delimited fields terminated by ',';

load data local inpath '/hivedata/uv_pv.txt' overwrite into table uv_pv;

select
dt,
count(distinct uid),
count(url)
from uv_pv
group by dt
;

28:使用hive的hql实现买过商品3的用户及其昨日消费

orderid userid  productid   price   timestamp   date
123,00101,3,200,1535945356,2019-08-28
124,00100,1,200,1535945356,2019-08-28
125,00101,3,200,1535945356,2019-08-29
126,00101,2,200,1535945356,2019-08-29
127,00102,5,200,1535945356,2019-08-29
128,00103,3,200,1535945356,2019-08-29
129,00103,3,200,1535945356,2019-08-29

create table buy_log(
orderid int,
userid string,
productid int,
price double,
tm string,
dt string
)
row format delimited fields terminated by ',';

load data local inpath '/home/hivedata/buy_log.txt' overwrite into table buy_log;

select 
userid userid,
sum(case when dt=date_sub(current_date,1) then price else 0 end) pay
from
(select
userid,
price,
dt
from buy_log
where productid=3 and (dt=current_date or dt=date_sub(current_date,1))) t1
group by userid,dt
having pay>0
;

29:统计为用户配重了角色的用户角色数量

用户表和角色表，统计为用户配重了角色的用户角色数量（hql）
用户表t1:
uid uname   role
1   zs  1,1,2,2,2
2   ls  1,2
3   ww  2,3,3
角色表t2:
roleid  rolename
1,唤醒师
2,召唤师
3,魔法师
结果如下:
uid uname   roleid  cnt
1   zs  1   召唤师   2
1   zs  2   唤醒师   3
3   ww  3   魔法师   2

select


create table user_role(
uid int,
uname string,
role array<int>
)
row format delimited fields terminated by '\t'
collection items terminated by ',';

load data local inpath '/home/hivedata/user_role.txt' overwrite into table user_role;

create table role(
roleid  int,
rolename string)
row format delimited fields terminated by ',';

load data local inpath '/home/hivedata/role.txt' overwrite into table role;

select
tmp.uid,
tmp.uname,
tmp.roleid,
role.rolename,
count(tmp.roleid)   --count(1)也可以
from role join
(
select
uid,
uname,
roleid
from user_role
lateral view explode(role) t as roleid) tmp
on tmp.roleid=role.roleid 
group by tmp.uid,tmp.uname,tmp.roleid,role.rolename;

[外链图片转存失败(img-KXeS0TZH-1569338883147)(E:\千峰培训\hadoop第四周笔记\1568824364983.png)]

30：统计两个人的通话总时长

有如下通话记录：
呼叫		被叫	  时长
goudan haoge 01:01:01
goudan mazi 00:11:21
goudan laowang 00:19:01
goudan Jingba 00:21:01
goudan weige 01:31:17
haoge mazi 00:51:01
haoge mazi 01:11:19
haoge laowang 00:00:21
haoge laowang 00:23:01
laowang mazi 01:18:01
laowang weige 00:18:00
Jingba haoge 00:01:01
Jingba haoge 00:00:06
Jingba haoge 00:02:04
Jingba haoge 00:02:54
haoge laowang 01:00:13
haoge laowang 00:01:01
haoge goudan 00:01:01

使用hql统计两个人的通话总时长（用户之间互相通话的时长）?




create table callme(
huname string, 
huiname string, 
times string 
) 
row format delimited fields terminated by ' '
;
load data local inpath '/home/hivedata/callme.txt' into table callme;



select
tmp1.huname,
tmp1.huiname,
from_unixtime( (unix_timestamp(tmp1.sumt,"HH:mm:ss") + unix_timestamp(nvl(tmp2.sumt,'00:00:00'), "HH:mm:ss") - unix_timestamp("24:00:00","HH:mm:ss") ),"HH:mm:ss")
from(
select
huname,
huiname,
from_unixtime(sum(unix_timestamp(times,"HH:mm:ss")),"HH:mm:ss" ) sumt
from
callme
group by huname, huiname
) tmp1
left join
(
select
huiname,
huname,
from_unixtime(sum(unix_timestamp(times,"HH:mm:ss")),"HH:mm:ss" ) sumt
from
callme
group by huname, huiname
) tmp2
on tmp1.huiname = tmp2.huname and tmp2.huiname = tmp1.huname
;

31：统计三个表中互不重复的数

ABC三个hive表 每个表中都只有一列int类型且列名相同，求三个表中互不重复的数
a.txt	b.txt	c.txt
1		2		1
2		3		2
3		11		3
4		12		11
5		14		5
6		15		6
7		16		7
8		18		8
9		35		20
		6		30
		7		40
		8
    
 
 create table a1(
id int
);
load data local inpath '/home/hivedata/c1.txt' into table c1;
create table b1(
id int
);
create table c1(
id int
);

select
tmp.id
from
(select
id
from
a1
union all
select
id
from
b1
union all
select
id
from
c1) tmp
group by tmp.id
having count(tmp.id)=1
;

laowang mazi 01:18:01
laowang weige 00:18:00
Jingba haoge 00:01:01
Jingba haoge 00:00:06
Jingba haoge 00:02:04
Jingba haoge 00:02:54
haoge laowang 01:00:13
haoge laowang 00:01:01
haoge goudan 00:01:01

使用hql统计两个人的通话总时长（用户之间互相通话的时长）?




create table callme(
huname string, 
huiname string, 
times string 
) 
row format delimited fields terminated by ' '
;
load data local inpath '/home/hivedata/callme.txt' into table callme;



select
tmp1.huname,
tmp1.huiname,
from_unixtime( (unix_timestamp(tmp1.sumt,"HH:mm:ss") + unix_timestamp(nvl(tmp2.sumt,'00:00:00'), "HH:mm:ss") - unix_timestamp("24:00:00","HH:mm:ss") ),"HH:mm:ss")
from(
select
huname,
huiname,
from_unixtime(sum(unix_timestamp(times,"HH:mm:ss")),"HH:mm:ss" ) sumt
from
callme
group by huname, huiname
) tmp1
left join
(
select
huiname,
huname,
from_unixtime(sum(unix_timestamp(times,"HH:mm:ss")),"HH:mm:ss" ) sumt
from
callme
group by huname, huiname
) tmp2
on tmp1.huiname = tmp2.huname and tmp2.huiname = tmp1.huname
;

weidajiangjiang

关注

5
点赞
踩
35

收藏

觉得还不错? 一键收藏
0
评论
hive的练习题

hive的练习题1：编写sql实现每个用户截止到每月为止的最大单月访问次数和累计到该月的总访问次数数据： userid,month,visitsA,2015-01,5 A,2015-01,15 B,2015-01,5 A,2015-01,8 B,2015-01,25 A,2015-01,5 A,2015-02,4 A,2015-02,6 B,2015-02,10 B,20...
复制链接

扫一扫