hive的练习题

hive的练习题

1:编写sql实现每个用户截止到每月为止的最大单月访问次数和累 计到该月的总访问次数

数据: userid,month,visits
A,2015-01,5 
A,2015-01,15 
B,2015-01,5 
A,2015-01,8 
B,2015-01,25 
A,2015-01,5 
A,2015-02,4 
A,2015-02,6 
B,2015-02,10 
B,2015-02,5 
A,2015-03,16 
A,2015-03,22 
B,2015-03,23 
B,2015-03,10 
B,2015-03,1 


create table visits(
userid string, 
month string, 
visits int 
) 
row format delimited fields terminated by ','
;
load data local inpath '/hivedata/visits.txt' overwrite into table visits;



select 
userid, 
month, 
visits, 
max(visits) over(distribute by userid sort by month) maxvisit, 
sum(visits) over(distribute by userid sort by month) totalvisit
from (
select 
userid, 
month, 
sum(visits) visits 
from visits 
group by userid,month
) t1 ;

2: 求出每个栏目的被观看次数及累计观看时长

数据: vedio表
Uid channel min 
1   1   23 
2   1   12 
3   1   12 
4   1   32 
5   1   342 
6   2   13 
7   2   34 
8   2   13 
9   2   134


参考答案: create table video( 
Uid int, 
channel string, 
min int ) 
row format delimited fields terminated by ' ' 
; 
load data local inpath '/home/hivedata/video.txt' into table video;
select 
channel, 
count(Uid) num,    --也可以写成count(1)
sum(min) total 
from
video 
group by channel ;


3: 编写连续7天登录的总人数

数据: t1表
Uid  dt  login_status(1登录成功,0异常)
1  2019-07-11 1 
1  2019-07-12 1 
1  2019-07-13 1 
1  2019-07-14 1 
1  2019-07-15 1 
1  2019-07-16 1 
1  2019-07-17 1 
1  2019-07-18 1 
2  2019-07-11 1 
2  2019-07-12 1 
2  2019-07-13 0 
2  2019-07-14 1 
2  2019-07-15 1 
2  2019-07-16 0 
2  2019-07-17 1 
2  2019-07-18 0
3  2019-07-11 1 
3  2019-07-12 1 
3  2019-07-13 1 
3  2019-07-14 1 
3  2019-07-15 1 
3  2019-07-16 1 
3  2019-07-17 1 
3  2019-07-18 1
create table login1(
uid int, 
dt string, 
login_status int 
) row format delimited fields terminated by ' '
;
load data local inpath '/hivedata/login.txt' into table login;
select
uid, 
dt 
from 
(
select 
t1.uid uid, 
date_sub(t1.dt,t1.rm) dt
from (
select 
uid, 
dt,
row_number() over(distribute by uid sort by dt) rm 
from 
login1 
where login_status=1
) t1
)t2 
group by uid,dt 
having count(uid) =7 ;


select
tmp.uid uid,
tmp.dt
from(
select
uid,
date_sub(dt,row_number() over(distribute by uid sort by dt)) as dt,
login_status
from login1
where login_status=1
) tmp
group by uid,dt
having count(uid)>7
;

4: 编写sql语句实现每班前三名,分数一样并列,同时求出前三名 按名次排序的一次的分差:

数据: stu表 
Stu_no class score
1	1901	90
2	1901	90
3	1901	83
4	1901	60
5	1902	66
6	1902	23
7	1902	99
8	1902	67
9	1902	87

select
tmp.class,
tmp.stu_no,
tmp.score,
tmp.score-nvl(LAG(tmp.score) over(distribute by tmp.class sort by rn),0),
tmp.rn
from(
select
class,
stu_no,
score,
row_number() over(distribute by class sort by score desc) rn
from student
) tmp
where tmp.rn<4
;



create table student( 
stu_no int, 
class string, 
score int
) 
row format delimited fields terminated by '\t'
;
load data local inpath '/home/hivedata/stu.txt' into table stu;
select 
class, 
stu_no, 
score, 
dr, 
score-nvl(lag(score) over(distribute by class sort by dr),0) 
from (
select 
class, 
stu_no,
score, 
row_number() over(distribute by class sort by score desc) dr 
from student) t1 
where t1.dr<4 ;


5:每个店铺的当月销售额和累计到当月的总销售额

数据:
店铺,月份,金额 
a,01,150
a,01,200
b,01,1000
b,01,800
c,01,250
c,01,220
b,01,6000
a,02,2000
a,02,3000
b,02,1000

create table shop1(
 shopid string,
 month string,
 money int ) 
 row format delimited fields terminated by ',' ;
 load data local inpath '/home/hivedata/shop1.txt' overwrite into table shop1;
 
 
 select
 shopid,
 month,
 tmp.m,
 tmp.m+lag(tmp.m,1,0) over(partition by shopid order by month)
 from(
 select
 shopid,
 month,
 sum(money) m
 from
 shop1
 group by shopid,month
 ) tmp
 group by shopid,month,tmp.m;
 
select
t1.shopid,
t1.month,
t1.m,
sum(t1.m) over(partition by shopid order by month)
from
(select 
shopid,
month,
sum(money) m
from
shop1
group by shopid,month)t1

6:订单及订单类型行列转换

t1表: 

order_id order_type order_time

111 N 10:00

111 A 10:05

111 B 10:10

是用hql获取结果如下: 

order_id order_type_1 order_type_2 order_time_1 order_time_2

111 N A 10:00 10:05

111 A B 10:05 10:10

create table t11

(order_id int,

order_type string,

order_time string)

row format delimited fields terminated by ' '

load data local inpath '/home/hadoop/exam/t11' into table t11

select

*

from

(select 

order_id,

order_type,

lead(order_type,1) over(partition by order_id order by order_time) order_type1,

order_time, 

lead(order_time,1) over(partition by order_id order by order_time) order_time1

from 

t11)t1

where t1.order_type1 is not null

7://找出用户在表中的第一次行为

select 
t1.id,
t1.time,
t1.action
from
(select 
id,
time,
action,
first_value(time) over(partition by id order by time) first
from
test2)t1
where t1.time=t1.first;

8: 某APP每天访问数据存放在表access_log里面,包含日期字段 ds,用户类型字段user_type,用户账号user_id,用户访问时间 log_time,请使用hive的hql语句实现如下需求

(1)、每天整体的访问UV、PV? 

(2)、每天每个类型的访问UV、PV? 

(3)、每天每个类型中最早访问时间和最晚访问时间? 

(4)、每天每个类型中访问次数最高的10个用户?

没有数据!!!!!!!!!!!!!!!!!11

9:每个用户连续登陆的最大天数

login表 
uid,udate
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
结果如下: 
uid cnt_days 
1 3 
2 2 
3 1 
4 3

select
b.uid,
max(b.c)
from (
select
uid,
count(a.e) c
from (
select
uid,
udate d,
date_sub(udate,row_number() over(partition by uid order by udate)) as e
from login) a
group by a.uid,a.e) b
group by b.uid;




select
uid,
max(tmp1.total)
from(
select
tmp.uid,
count(tmp.dt) total
from(
select
uid,
date_sub(udate,row_number() over(partition by uid order by udate)) as dt
from login
) tmp
group by tmp.uid,tmp.dt
) tmp1
group by tmp1.uid
;

10:使用hive的hql实现男女各自第一名及其它

id sex chinese math 
0 0 70 50
1 0 90 70
2 1 80 90

1、男女各自语文第一名(0:男,1:女) 

2、男生成绩语文大于80,女生数学成绩大于70

create table cource
(id int,
sex int,
chinese int,
math int)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/cource.txt' into table cource;



1:
select
tmp.sex,
fir
from
(
select
sex,
first_value(chinese) over(partition by sex order by chinese desc) fir
from
cource) tmp
;


select
sex,
first_value(chinese) over(partition by sex order by chinese desc) fir
from
cource;
或者:
select
*
from
(select 
id,
sex,
chinese,
math,
row_number() over(partition by sex order by chinese desc) rr
from
cource)t1
where t1.rr = 1


2:
select
* 
from
cource
where sex=0 and chinese>80
union
select
*
from
cource
where sex=1 and manth>70;

10:课程行转列

数据:
t1表
id course
1,a
1,b
1,c
1,e
2,a
2,c
2,d
2,f
3,a
3,b
3,c
3,e

根据编写sql,得到结果如下(表中的1表示选修,表中的0表示未选修):
id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0

select id, 
sum(case course when 'a' then 1 else 0 end) a,
sum(case course when 'b' then 1 else 0 end) b,
sum(case course when 'c' then 1 else 0 end) c, 
sum(case course when 'd' then 1 else 0 end) d, 
sum(case course when 'e' then 1 else 0 end) e, 
sum(case course when 'f' then 1 else 0 end) f 
from t1 group by id ;



select
id,
sum(case when subject="语文" then score else 0 end) as "语文",
sum(case when subject="数学" then score else 0 end) as "数学",
sum(case when subject="英语" then score else 0 end) as "英语",
sum(case when subject="政治" then score else 0 end) as "政治",
sum(score) as total
from
score

11:编写sql实现如下:

数据: t1表 

uid tags 
1 1,2,3
2 2,3
3 1,2

编写sql实现如下结果: 
uid tag 
1 1 
1 2 
1 3 
2 2 
2 3 
3 1 
3 2
create table t111
(uid int,
tags string)
row format delimited fields terminated by ' ';
load data local inpath '/home/hadoop/exam/t111' into table t111
select 
uid,
tag
from t111
lateral view explode(split(tags,',')) tags as tag

12:用户标签连接查询

数据:
T1表:
Tags
1,2,3
1,2
2,3

T2表:
Id  lab
1 A
2 B
3 C

根据T1和T2表的数据,编写sql实现如下结果:
ids tags
1,2,3   A,B,C
1,2 A,B
2,3 B,C

create table tab1(
tags array<string>
)
row format delimited fields terminated by ' '
collection items terminated by ','
;

create table tab2( 
id int, 
lab string 
) 
row format delimited fields terminated by ' ' ; 


select tags,cj from tab1 lateral view explode(tags) tags as cj;
select
tmp01.tags,
collect_list(tmp01.lab)
from(
select
tmp.tags,
tab2.lab
from(
select tags,cj from tab1 lateral view explode(tags) tags as cj
) tmp inner join tab2
on tmp.cj=tab2.id) tmp01
group by tmp01.tags;

13:用户标签组合

数据: 

t1表: 
id tag flag 
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8 

编写sql实现如下结果: 
id tag flag
a b 1|2|3
c d 6|8

create table s1
(id string,
tag string,
flag string)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/s1' into table s1;

select
id,
tag,
concat_ws('|',collect_set(flag))
from
s1
group by id,tag;

select
t1.id,
t1.tag,
concat_ws("|",collect_list(t1.flag))
from
(select
id id,
tag tag,
flag flag
from s1
order by flag)t1
group by t1.id,t1.tag;

14: 用户标签行列互换

用户标签行列互换

t1表 
uid name tags 
1 goudan chihuo,huaci
2 mazi sleep
3 laotie paly

编写sql实现如下结果: 
uid name tag
1 goudan chihuo 
1 goudan huaci 
2 mazi sleep 
3 laotie paly

create table k1
(
uid int, 
name string,
tags array<string>
)
row format delimited fields terminated by ' '
collection items terminated by ',';

load data local inpath '/home/hivedata/k1' into table k1


select uid,name,cj from k1 lateral view explode(tags) tags as cj; 

15:词频统计

词频统计

数据: 
t1表: 
uid contents
1 i|love|china
2 china|is|good|i|i|like 

统计结果如下,如果出现次数一样,则按照content名称排序: 
content cnt
i 3
china 2
good 1
like 1
love 1
is 1

create table l1
(uid int,
contents string)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/l1' into table l1;

select explode(split(contents,'\|')) from l1;
select cj from l1 lateral view explode(split(contents,'\\|')) contents as cj ;


select
t1.cj cj,
count(t1.cj) cnt
from (
select cj from l1 lateral view explode(split(contents,'\\|')) contents as cj
) t1
group by t1.cj
order by cnt desc;



select ct, count(1) cn from (select ct from l1 lateral view explode(split(contents,"\\|")) tmp as ct) tmp group by ct order by cn desc,ct ;

16: 兴趣行转列

t1表 
name    sex hobby 
janson  男   打乒乓球、游泳、看电影 
tom 男   打乒乓球、看电影

hobby多3个值,使用hql实现结果如下:
name    sex hobby1  hobby2  hobby3
janson  男   打乒乓球    游泳     看电影 
tom 男   打乒乓球    看电影
create table ho(
name  string,
sex  string, 
hobby array<string> 
) 
row format delimited fields terminated by '\t' 
collection items terminated by ',' 
; 

load data local inpath '/home/hivedata/hobby.txt' overwrite into table ho;

select 
name, 
sex, 
case when true then nvl(hobby[0],"") else null end hobby1, 
case when true then nvl(hobby[1],"") else null end hobby2, 
case when true then nvl(hobby[2],"") else null end hobby3
from ho
;

17:求top3英雄及其pick率----------------------

1	亚索,挖掘机,艾瑞,洛,卡沙
2	亚索,盖伦,奥巴马,牛头,皇子
3	亚索,盖伦,艾瑞,宝石,琴女
4	亚索,盖伦,赵信,老鼠,锤石

create table heros(
id int,
names array<string>
)
row format delimited fields terminated by '\t'
collection items terminated by ',';

load data local inpath '/home/hivedata/heros.txt' overwrite into table heros;

select
tmp.cj,
tmp.tal,
round((tmp.tal/sum(tmp.tal)),2)
from
(select
cj,
count(cj) tal
from heros
lateral view explode(names) names as cj
group by cj) tmp
group by cj,tal
limit 3;



select
name,
count(1) ann,
round(count(1)/totalcn,2) 
from (
select 
count(name) over() totalcn, 
name 
from 
heros 
lateral view explode(names) t as name
) t1 
group by name,totalcn
order by ann desc
limit 3;


18:时间格式转换

数据: t1表
20190730 20190731
编写sql实现如下的结果:
2019-07-30 2019-07-31


create table dt(
dt string
);
load data local inpath '/home/hivedata/dt.txt' overwrite into table dt;
select from_unixtime(
unix_timestamp(dt,"yyyyMMdd"),"yyyy-MM-dd" 
) 
from 
dt 
;


19:两个表A 和B ,均有key 和value 两个字段,写一个SQL语 句,将B表中的value值置成A表中相同key值对应的value值

A:
key vlaue 
k1 123
k2 234
k3 235
B:
key value
k1 111
k2 222
k5 246
使用hive的hql实现,结果是B表数据如下: 
k1 123
k2 234
k5 246

create table A
(key string,
value int)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/A' into table A;
create table B
(key string,
value int)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/B' into table B;


select
B.key,
if(A.key is null,B.value,A.value) value
from
B left join A
on B.key = A.key;

2111:有用户表user(uid,name)以及黑名单表Banuser(uid)

1、用left join方式写sql查出所有不在黑名单的用户信息
2、用not exists方式写sql查出所有不在黑名单的用户信息

create table u( 
id string, 
name string )
row format delimited fields terminated by ',' ;

create table banuser( 
id string 
) 
;
load data local inpath '/hivedata/banuser.txt' overwrite into table banuser; load data local inpath '/hivedata/u.txt' overwrite into table u;
1、
select
u.id, 
u.name 
from u 
left join banuser b 
on u.id=b.id
where 
b.id is null ;
2、
select
u.id, 
u.name
from 
u 
where
not exists (
select 1 from banuser b where u.id=b.id
) 
;

20访问日志正则提取

表t1(注:数据时正常的访问日志数据,分隔符全是空格)
8.35.201.160 - - [16/May/2018:17:38:21 +0800] "GET /uc_server/data/avatar/000/01/54/22_avatar_middle.jpg HTTP/1.1" 200 5396

使用hive的hql实现结果如下:
ip  dt  url
8.35.201.160    2018-5-16 17:38:21  /uc_server/data/avatar/000/01/54/22_avatar_middle.jpg

create table login_log( log  string )
load data local inpath '/hivedata/login_log.txt' overwrite into table login_log;
select regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",1), from_unixtime(unix_timestamp(regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",2),"dd/MMM/yyyy:HH:mm:ss Z"),"yyyy-MM-dd HH:mm:ss"), regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",4) from login_log where log regexp "([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)" ;

21 使用hive的hql查询用户所在部门

dpt表
dpt_id dpt_name
1   产品 
2   技术
user_dpt表 
user_id dpt_id
1   1 
2   1 
3   2 
4   2 
5   3
result表
user_id dpt_id dpt_name
1       1       产品 
2       1       产品 
3       2       技术 
4       2       技术 
5       3       其他部门

create table dpt( 
dpt_id int, 
dpt_name string 
) 
row format delimited fields terminated by '\t' ; 

load data local inpath '/hivedata/dpt.txt' overwrite into table dpt;

create table user_dpt(
user_id int, 
dpt_id int )
row format delimited fields terminated by '\t' ; 
load data local inpath '/hivedata/user_dpt.txt' overwrite into table user_dpt;


select 
t1.user_id, 
t1.dpt_id, 
nvl(t2.dpt_name,"其他部门")
from
user_dpt t1 
left join dpt t2 
on t1.dpt_id=t2.dpt_id ;

22 查出每个学期每门课程最高分记录

course_score表: 
id,name,course,score,term 
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,lisi,数学,70,2016
4,wangwu,化学,80,2017
5,zhangsan,语文,85,2015
6,zhangsan,化学,80,2015
7,wangwuww,化学,90,2017
8,zhangsanwww,语文,85,2015

编写sql完成如下查询,一次查询实现好,也可以写多次查询实现: 
1、查出每个学期每门课程高分记录(包含全部5个字段) 
2、查出单个学期中语文课在90分以上的学生的数学成绩记录(包含全部5个字段)

create table test4
(id int,
name string,
course string,
score int,
term int)
row format delimited fields terminated by ','

load data local inpath '/home/hivedata/test4' into table test4;

select 
t1.*
from
test4 t1
inner join
(select 
course,
max(score) score,
term
from
test4
group by course,term)t2
on t2.score = t1.score and t2.term = t1.term and t1.course = t2.course;

select 
test4.*
from test4
inner join
(select 
id,name,course,score,term
from test4
where score>=90 and course='语文')t
on
test4.term = t.term
where test4.course='数学' 
 ----------------------------and t.id is not null



23:设计数据库表,用来存放学生基本信息,课程信息,学生的课 程及成绩,并给出sql语句,查询平均成绩大于85的所有学生

stu_1 
id,name,age,addr 
1,zs1,22,bj 
2,zs2,22,bj 
3,zs3,22,bj 
4,zs4,22,bj 
5,zs5,22,bj
course_1 
cid,cname 
1,语文 
2,数学 
3,政治 
4,美术 
5,历史
course_sc 
id,cid,score 
1,1,87 
1,2,92
1,3,69 
2,2,83 
2,3,92 
2,4,87 
2,5,83
create table stu_1( 
id string, 
name string,
age int, 
addr string )
row format delimited fields terminated by ',' ;
create table course_1( 
cid string, 
cname string 
) 
row format delimited fields terminated by ',' ;

create table course_sc(
id string,
cid string, 
score int )
row format delimited fields terminated by ',' ;

load data local inpath '/hivedata/course_1.txt' overwrite into table course_1; load data local inpath '/hivedata/stu_1.txt' overwrite into table stu_1; load data local inpath '/hivedata/course_sc.txt' overwrite into table course_sc; 


select cs.id, avg(score) avgscore from course_sc cs group by cs.id having avgscore>85 ;

24:每个渠道的下单用户数、订单总金额

请使用hive hql查询出2019-08-06号 每个渠道的下单用户数、订单总金额。 hql语句实现,结果表头如下: channel user_num    order_amount

create table order_34( 
order_id bigint,
user_id int, 
amount double,
channel string,
time string comment 'yyyy-MM-dd HH:mm:ss' 
) 
partitioned by(dt string comment 'yyyy-MM-dd')
row format delimited fields terminated by ',' ;

1,100,19,a,2019-08-06 19:00:00 
2,101,19,b,2019-08-06 19:00:01 
3,100,19,a,2019-08-05 19:00:00 
4,101,19,b,2019-08-05 19:00:01
5,102,19,a,2019-08-06 19:00:00
6,102,19,a,2019-08-06 19:00:01

load data local inpath '/hivedata/order_34.txt' into table order_34 partition(dt='2019-08-06');

hive hql查询出2019-08-06号 每个渠道的下单用户数、订单总金额。

select 
channel,
count(distinct user_id), 
sum(amount) 
from order_34
where dt='2019-08-06' and to_date(time)='2019-08-06' 
group by 
channel,to_date(time) ;

25:登录且阅读的用户数,已经阅读书籍数量及其它

有如下三张表: 
表A(登录表):
ds  user_id
2019-08-06  1
2019-08-06  2
2019-08-06  3
2019-08-06  4
表B(阅读表): 
ds  user_id read_num 
2019-08-06  1   2 
2019-08-06  2   3 
2019-08-06  3   6
表C(付费表):
ds  user_id price 
2019-08-06  1   55.6
2019-08-06  2   55.8
基于上述三张表,请使用hive的hql语句实现如下需求:
(1)、用户登录并且当天有阅读的用户数,已经阅读书籍数量 
(2)、用户登录并且阅读,但是没有付费的用户数
(3)、用户登录并且付费,付费用户书籍和金额



create table if not exists lg(
ds string,
user_id string
)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/lg.txt' into table lg;

create table if not exists read(
ds string,
user_id string,
read_num  int)
row format delimited fields terminated by ' ';
load data local inpath '/home/hivedata/read.txt' into table read;

create table if not exists pay(
ds string,
user_id string,
price  double)
row format delimited fields terminated by ' ';

load data local inpath '/home/hivedata/pay.txt' into table pay;
(1)、用户登录并且当天有阅读的用户数,已经阅读书籍数量
select
count(1),
sum(read_num)
from read r
join lg l
on l.user_id = r.user_id and l.ds = r.ds
group by r.ds;
(2)、用户登录并且阅读,但是没有付费的用户数
select
r.ds ds, 
count(1)
from read r
left semi join lg l
on l.user_id = r.user_id and l.ds = r.ds
left join pay  p
on r.ds=p.ds and r.user_id=p.user_id
where p.user_id is null
group by r.ds;
(3)、用户登录并且付费,付费用户id和金额
select 
p.user_id,
p.price
from pay p
left join lg
on p.user_id=lg.user_id and p.ds=lg.ds
where lg.user_id is not null;

26:高消费者报表

有三个表,分别是: 
区域(district) 区域中有两个字段分别是区域Id(disid)和区域名称(disname) 
城市(city) 城市有两个字段分别是城市ID(cityid)和区域ID(disid)
订单(order) 订单有四个字段分别是订单ID(orderid)、用户ID(userid)、城市ID(cityid)和消费金 额(amount)。
district表:
disid   disname
1   华中 
2   西南
city表: 
cityid  disid 
1   1
2   1 
3   2 
4   2
5   2
order表: 
oid userid  cityid  amount 
1   1   1   1223.9 
2   1   1   9999.9 
3   2   2   2322 
4   2   2   8909 
5   2   3   6789 
6   2   3   798 
7   3   4   56786 
8   4   5   78890
高消费者是消费金额大于1W的用户,使用hive hql生成如下报表: 区域名 高消费者人数  消费总额


create table district( 
disid string, 
disname string )
row format delimited fields terminated by '\t' ;

create table city( 
cityid string,
disid string )
row format delimited fields terminated by '\t' ;


create table order_29( 
oid string,
userid string, 
cityid string,
amount double ) 
row format delimited fields terminated by '\t' ; 

load data local inpath '/hivedata/district.txt' into table district;
load data local inpath '/hivedata/city.txt' into table city;
load data local inpath '/hivedata/order_29.txt' into table order_29;

select 
b.disid,
c.disname, 
a.userid, 
sum(a.amount)
from order_29 a join city b 
on a.cityid=b.cityid 
join district c 
on b.disid = c.disid 
group by b.disid,c.disname,a.userid;

27:请使用sql计算pv.uv

数据:t1表
uid dt    url
1   2019-08-06  http://www.baidu.com
2   2019-08-06  http://www.baidu.com
3   2019-08-06  http://www.baidu.com
3   2019-08-06  http://www.soho.com
3   2019-08-06  http://www.meituan.com
3   2019-08-06 

结果如下:
date uv pv2019-08-6 3 5


create table uv_pv(
uid string,
dt string,
url string
)
row format delimited fields terminated by ',';

load data local inpath '/hivedata/uv_pv.txt' overwrite into table uv_pv;

select
dt,
count(distinct uid),
count(url)
from uv_pv
group by dt
;

28:使用hive的hql实现买过商品3的用户及其昨日消费

orderid userid  productid   price   timestamp   date
123,00101,3,200,1535945356,2019-08-28
124,00100,1,200,1535945356,2019-08-28
125,00101,3,200,1535945356,2019-08-29
126,00101,2,200,1535945356,2019-08-29
127,00102,5,200,1535945356,2019-08-29
128,00103,3,200,1535945356,2019-08-29
129,00103,3,200,1535945356,2019-08-29

create table buy_log(
orderid int,
userid string,
productid int,
price double,
tm string,
dt string
)
row format delimited fields terminated by ',';

load data local inpath '/home/hivedata/buy_log.txt' overwrite into table buy_log;

select 
userid userid,
sum(case when dt=date_sub(current_date,1) then price else 0 end) pay
from
(select
userid,
price,
dt
from buy_log
where productid=3 and (dt=current_date or dt=date_sub(current_date,1))) t1
group by userid,dt
having pay>0
;

29:统计为用户配重了角色的用户角色数量

用户表和角色表,统计为用户配重了角色的用户角色数量(hql)
用户表t1:
uid uname   role
1   zs  1,1,2,2,2
2   ls  1,2
3   ww  2,3,3
角色表t2:
roleid  rolename
1,唤醒师
2,召唤师
3,魔法师
结果如下:
uid uname   roleid  cnt
1   zs  1   召唤师   2
1   zs  2   唤醒师   3
3   ww  3   魔法师   2

select


create table user_role(
uid int,
uname string,
role array<int>
)
row format delimited fields terminated by '\t'
collection items terminated by ',';

load data local inpath '/home/hivedata/user_role.txt' overwrite into table user_role;

create table role(
roleid  int,
rolename string)
row format delimited fields terminated by ',';

load data local inpath '/home/hivedata/role.txt' overwrite into table role;

select
tmp.uid,
tmp.uname,
tmp.roleid,
role.rolename,
count(tmp.roleid)   --count(1)也可以
from role join
(
select
uid,
uname,
roleid
from user_role
lateral view explode(role) t as roleid) tmp
on tmp.roleid=role.roleid 
group by tmp.uid,tmp.uname,tmp.roleid,role.rolename;




[外链图片转存失败(img-KXeS0TZH-1569338883147)(E:\千峰培训\hadoop第四周笔记\1568824364983.png)]

30: 统计两个人的通话总时长

有如下通话记录:
呼叫		被叫	  时长
goudan haoge 01:01:01
goudan mazi 00:11:21
goudan laowang 00:19:01
goudan Jingba 00:21:01
goudan weige 01:31:17
haoge mazi 00:51:01
haoge mazi 01:11:19
haoge laowang 00:00:21
haoge laowang 00:23:01
laowang mazi 01:18:01
laowang weige 00:18:00
Jingba haoge 00:01:01
Jingba haoge 00:00:06
Jingba haoge 00:02:04
Jingba haoge 00:02:54
haoge laowang 01:00:13
haoge laowang 00:01:01
haoge goudan 00:01:01

使用hql统计两个人的通话总时长(用户之间互相通话的时长)?




create table callme(
huname string, 
huiname string, 
times string 
) 
row format delimited fields terminated by ' '
;
load data local inpath '/home/hivedata/callme.txt' into table callme;



select
tmp1.huname,
tmp1.huiname,
from_unixtime( (unix_timestamp(tmp1.sumt,"HH:mm:ss") + unix_timestamp(nvl(tmp2.sumt,'00:00:00'), "HH:mm:ss") - unix_timestamp("24:00:00","HH:mm:ss") ),"HH:mm:ss")
from(
select
huname,
huiname,
from_unixtime(sum(unix_timestamp(times,"HH:mm:ss")),"HH:mm:ss" ) sumt
from
callme
group by huname, huiname
) tmp1
left join
(
select
huiname,
huname,
from_unixtime(sum(unix_timestamp(times,"HH:mm:ss")),"HH:mm:ss" ) sumt
from
callme
group by huname, huiname
) tmp2
on tmp1.huiname = tmp2.huname and tmp2.huiname = tmp1.huname
;

31:统计三个表中互不重复的数

ABC三个hive表 每个表中都只有一列int类型且列名相同,求三个表中互不重复的数
a.txt	b.txt	c.txt
1		2		1
2		3		2
3		11		3
4		12		11
5		14		5
6		15		6
7		16		7
8		18		8
9		35		20
		6		30
		7		40
		8
    
 
 create table a1(
id int
);
load data local inpath '/home/hivedata/c1.txt' into table c1;
create table b1(
id int
);
create table c1(
id int
);

select
tmp.id
from
(select
id
from
a1
union all
select
id
from
b1
union all
select
id
from
c1) tmp
group by tmp.id
having count(tmp.id)=1
;

32

laowang mazi 01:18:01
laowang weige 00:18:00
Jingba haoge 00:01:01
Jingba haoge 00:00:06
Jingba haoge 00:02:04
Jingba haoge 00:02:54
haoge laowang 01:00:13
haoge laowang 00:01:01
haoge goudan 00:01:01

使用hql统计两个人的通话总时长(用户之间互相通话的时长)?




create table callme(
huname string, 
huiname string, 
times string 
) 
row format delimited fields terminated by ' '
;
load data local inpath '/home/hivedata/callme.txt' into table callme;



select
tmp1.huname,
tmp1.huiname,
from_unixtime( (unix_timestamp(tmp1.sumt,"HH:mm:ss") + unix_timestamp(nvl(tmp2.sumt,'00:00:00'), "HH:mm:ss") - unix_timestamp("24:00:00","HH:mm:ss") ),"HH:mm:ss")
from(
select
huname,
huiname,
from_unixtime(sum(unix_timestamp(times,"HH:mm:ss")),"HH:mm:ss" ) sumt
from
callme
group by huname, huiname
) tmp1
left join
(
select
huiname,
huname,
from_unixtime(sum(unix_timestamp(times,"HH:mm:ss")),"HH:mm:ss" ) sumt
from
callme
group by huname, huiname
) tmp2
on tmp1.huiname = tmp2.huname and tmp2.huiname = tmp1.huname
;

  • 5
    点赞
  • 35
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值