Hive场景题（持续更新）

最新推荐文章于 2023-02-01 13:32:29 发布
图样_Study
最新推荐文章于 2023-02-01 13:32:29 发布
阅读量208
点赞数
文章标签： hive 数据仓库
本文链接：https://blog.csdn.net/weixin_42462738/article/details/116590772
版权
开启本地模式
set hive.exec.mode.local.auto=true;
===========================================================================
1. 编写sql实现每个用户截止到每月为止的最大单月访问次数和累计到该月的总访问次数
userid,month,visits
A,2015‐01,5
A,2015‐01,15
B,2015‐01,5
A,2015‐01,8
B,2015‐01,25
A,2015‐01,5
A,2015‐02,4
A,2015‐02,6
B,2015‐02,10
B,2015‐02,5
A,2015‐03,16
A,2015‐03,22
B,2015‐03,23
B,2015‐03,10
B,2015‐03,1
每个用户截止到每月为止的最大单月访问次数和累计到该月的总访问次数

create table table1(
  userid string,
  month string,
  visits int
)
row format delimited fields terminated by ',';

load data local inpath '/opt/module/testdata/t1.txt' into table table1;


select
userid,
month,
sum(visits) DangYue
from table1
group by userid, month; t1


select
userid,
month,
max(DangYue) over(distribute by userid sort by month) maxvisit,
sum(DangYue) over(distribute by userid sort by month) sumvisit,
DangYue
from (select
userid,
month,
sum(visits) DangYue
from table1
group by userid, month)t1;
=========================================================================
2.求出每个栏目的被观看次数及累计观看时长
Uid channl min
1	1	23
2	1	12
3	1	12
4	1	32
5	1	342
6	2	13
7	2	34
8	2	13
9	2	134

create table table2(
uid int,
channl int,
min int
)
row format delimited fields terminated by '\t';

load data local inpath '/opt/module/testdata/t2.txt' into table table2;

select
channl,
count(distinct uid) cnt,
sum(min) sum
from table2
group by channl;
=========================================================================
3.编写连续7天登录的总人数
Uid  dt  login_status(1登录成功,0异常)
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 1
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1

create table table3(
uid int,
dt string,
login_status int
)
row format delimited fields terminated by ' ';

load data local inpath '/opt/module/testdata/t3.txt' into table table3;

select
uid,
dt
row_number() over(distribute by uid sort by dt) rn
from table3
where login_status = 1; t1

select
uid,
date_sub(dt, rn) newdt
from (select
uid,
dt,
row_number() over(distribute by uid sort by dt) rn
from table3
where login_status = 1)t1; t2

select
uid
from (select
uid,
date_sub(dt, rn) newdt
from (select
uid,
dt,
row_number() over(distribute by uid sort by dt) rn
from table3
where login_status = 1)t1)t2
group by uid, newdt
having count(uid) > 6; t3

//最后还需要将uid个数再做统计
select
count(1) cnt
from (select
uid
from (select
uid,
date_sub(dt, rn) newdt
from (select
uid,
dt,
row_number() over(distribute by uid sort by dt) rn
from table3
where login_status = 1)t1)t2
group by uid, newdt
having count(uid) > 6)t3;

===================================================================
4.编写sql语句实现每班前三名，分数一样并列，同时求出前三名按名次排序的一次的分差
Stu_no class score
1	1901	90
2	1901	90
3	1901	83
4	1901	60
5	1902	66
6	1902	23
7	1902	99
8	1902	67
9	1902	87
结果数据：
班级	stu_no	score	rn rn1	rn_diff
1901	1	90	1	1	90
1901	2	90	2	1	0
1901	3	83	3	2	-7
1902	7	99	1	1	99
1902	9	87	2	2	-12
1902	8	67	3	3	-20

create table table4(
stu_no int,
class int,
score int
)
row format delimited fields terminated by '\t';

load data local inpath '/opt/module/testdata/t4.txt' into table table4;

*** tip: rank() 跳 1 1 1 4
		 dense_rank() 不跳 1 1 1 2

select
class,
stu_no,
score,
row_number() over(distribute by class sort by score desc) rn,
dense_rank() over(distribute by class sort by score desc) rn1,
score - nvl(lag(score) over(distribute by class sort by score desc), 0) rn_diff
from table4; t1

select
*
from (select
class,
stu_no,
score,
row_number() over(distribute by class sort by score desc) rn,
dense_rank() over(distribute by class sort by score desc) rn1,
score - nvl(lag(score) over(distribute by class sort by score desc), 0) rn_diff
from table4)t1
where rn < 4;

===================================================================
5.每个店铺的当月销售额和累计到当月的总销售额
店铺,月份,金额
a,01,150
a,01,200
b,01,1000
b,01,800
c,01,250
c,01,220
b,01,6000
a,02,2000
a,02,3000
b,02,1000
b,02,1500
c,02,350
c,02,280
a,03,350
a,03,250

create table table5(
shop string,
month int,
`money` int
)
row format delimited fields terminated by ',';

load data local inpath '/opt/module/testdata/t5.txt' into table table5;

select
shop,
month,
sum(money) summoney
from table5
group by shop, month; t1

select
shop,
summoney,
sum(summoney) over(distribute by shop sort by month) LeiJi
from (select
shop,
month,
sum(money) summoney
from table5
group by shop, month)t1;

结果如下：
shop    summoney        leiji
a       350     350
a       5000    5350
a       600     5950
b       7800    7800
b       2500    10300
c       470     470
c       630     1100

========================================================================
6.订单及订单类型行列互换
t1表:
order_id  order_type  order_time
111	N	10:00
111	A	10:05
111	B	10:10

是用hql获取结果如下：
order_id  order_type_1  order_type_2  order_time_1  order_time_2
111       N             A              10:00           10:05
111       A             B              10:05           10:10

create table table6(
order_id int,
order_type string,
order_time string
)
row format delimited fields terminated by '\t';

load data local inpath '/opt/module/testdata/t6.txt' into table table6;


select
order_id order_id,
order_type order_type_1,
lead(order_type) over(distribute by order_id sort by order_time) order_type_2,
order_time order_time_1,
lead(order_time) over(distribute by order_id sort by order_time) order_time_2
from table6; t1

select
*
from (select
order_id order_id,
order_type order_type_1,
lead(order_type) over(distribute by order_id sort by order_time) order_type_2,
order_time order_time_1,
lead(order_time) over(distribute by order_id sort by order_time) order_time_2
from table6)t1
where order_type_2 is not null;

=================================================================================
7. 每个用户连续登陆的最大天数？
数据:
login表
uid,dt
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03

create table table7(
uid int,
dt string
)
row format delimited fields terminated by ',';

load data local inpath '/opt/module/testdata/t7.txt' into table table7;

select
uid,
dt,
row_number() over(distribute by uid sort by dt) rn
from table7; t1

select
uid,
date_sub(dt, rn) newdt
from (select
uid,
dt,
row_number() over(distribute by uid sort by dt) rn
from table7)t1; t2
 
select
uid,
count(1) cnt
from (select
uid,
date_sub(dt, rn) newdt
from (select
uid,
dt,
row_number() over(distribute by uid sort by dt) rn
from table7)t1)t2
group by uid, newdt; t3

select
uid,
max(cnt) maxcnt
from (select
uid,
count(1) cnt
from (select
uid,
date_sub(dt, rn) newdt
from (select
uid,
dt,
row_number() over(distribute by uid sort by dt) rn
from table7)t1)t2
group by uid, newdt)t3
group by uid;

结果如下：
uid cnt_days
1 3
2 2
3 1
4 3

=============================================================================
8.
id	sex	chinese	math
0	0	70	50
1	0	90	70
2	1	80	90

1、男女各自语文第一名（0:男，1:女）
2、男生成绩语文大于80，女生数学成绩大于70

create table table8(
id int,
sex int,
chinese int,
math int
)
row format delimited
fields terminated by '\t';

load data local inpath '/opt/module/testdata/t8.txt' into table table8;

1.
select
sex,
max(chinese)
from table8
group by sex;

2.
select
id,
sex,
chinese
from table8
where chinese > 80 and sex = 0
union
select
id,
sex,
math
from table8
where math > 70 and sex = 1;
========================================================================
9.用户登陆时间记录表,当月每次登陆一次会记录一条记录A表如下:
需计算出每个用户本月最大连续登录天数。
log_time            uid
2018-10-01 18:00:00,123
2018-10-02 18:00:00,123
2018-10-02 19:00:00,456
2018-10-04 18:00:00,123
2018-10-04 18:00:00,456
2018-10-05 18:00:00,123
2018-10-06 18:00:00,123
用户123最大连续暨录天数为3，用户456最大连续登录天数为1
uid     maxcnt
123     3
456     1

create table table9(
log_time string,
uid  string
)
row format delimited
fields terminated by ','
;

load data local inpath '/opt/module/testdata/t9.txt' into table table9;

select
uid,
to_date(log_time) newdt
from table9; t1

select
uid,
newdt,
row_number() over(distribute by uid sort by newdt) rn
from (select
uid,
to_date(log_time) newdt
from table9)t1; t2

select
uid,
date_sub(newdt, rn) nndt
from (select
uid,
newdt,
row_number() over(distribute by uid sort by newdt) rn
from (select
uid,
to_date(log_time) newdt
from table9)t1)t2; t3

select
uid,
count(1) cnt
from (select
uid,
date_sub(newdt, rn) nndt
from (select
uid,
newdt,
row_number() over(distribute by uid sort by newdt) rn
from (select
uid,
to_date(log_time) newdt
from table9)t1)t2)t3
group by uid, nndt; t4

select
uid,
max(cnt) maxcnt
from (select
uid,
count(1) cnt
from (select
uid,
date_sub(newdt, rn) nndt
from (select
uid,
newdt,
row_number() over(distribute by uid sort by newdt) rn
from (select
uid,
to_date(log_time) newdt
from table9)t1)t2)t3
group by uid, nndt)t4
group by uid;

==============================================================================
10.
id userid subject score
1 001 语文 90
2 001 数学 92
3 001 英语 80
4 002 语文 88
5 002 数学 90
6 002 英语 75.5
7 003 语文 70
8 003 数学 85
9 003 英语 90
10 003 政治 82
实现行列互换，结果如下

create table table10(
id int,
userid string,
subject string,
score double
)
row format delimited 
fields terminated by ' ';

load data local inpath '/opt/module/testdata/t10.txt' into table table10;

select
userid,
sum(case when subject='语文' then score else 0 end) chinese,
sum(case when subject='数学' then score else 0 end) math,
sum(case when subject='英语' then score else 0 end) english,
sum(case when subject='政治' then score else 0 end) politics,
sum(score) total
from table10
group by userid

union

select
if(1=1, 'total', 't'),
sum(chinese),
sum(math),
sum(english),
sum(politics),
sum(total)
from (select
userid,
sum(case when subject='语文' then score else 0 end) chinese,
sum(case when subject='数学' then score else 0 end) math,
sum(case when subject='英语' then score else 0 end) english,
sum(case when subject='政治' then score else 0 end) politics,
sum(score) total
from table10
group by userid)t1;

===============================================================================
11.编写sql实现如下：
数据：
t1表
uid	tags
1	1,2,3
2	2,3
3	1,2

编写sql实现如下结果：
uid	tag
1	1
1	2
1	3
2	2
2	3
3	1
3	2
create table table11(
uid int,
tags string
)
row format delimited fields terminated by '\t';

load data local inpath '/opt/module/testdata/t11.txt' into table table11;

select 
uid,
tag
from table11
lateral view explode(split(tags, ',')) t1 as tag;

=============================================================================
12.
T1表:
Tags
1,2,3
1,2
2,3

T2表:
Id  lab
1 A
2 B
3 C

根据T1和T2表的数据，编写sql实现如下结果：
ids	tags
1,2,3	A,B,C
1,2	A,B
2,3	B,C

create table table12_1(
tags string
)
;

create table table12_2(
id int,
lab string
)
row format delimited fields terminated by ' ';

load data local inpath '/opt/module/testdata/t12_1.txt' into table table12_1;
load data local inpath '/opt/module/testdata/t12_2.txt' into table table12_2;

select
tags,
tag
from table12_1
lateral view explode(split(tags, ',')) t1 as tag; t2

select
t2.tags tags,
t3.lab lab
from (select
tags,
tag
from table12_1
lateral view explode(split(tags, ',')) t1 as tag)t2 join table12_2 t3 
on t2.tag = t3.id; t4

select
tags,
concat_ws(',', collect_list(lab)) labs
from (select
t2.tags tags,
t3.lab lab
from (select
tags,
tag
from table12_1
lateral view explode(split(tags, ',')) t1 as tag)t2 join table12_2 t3 
on t2.tag = t3.id)t4
group by tags;

=============================================================================
13.
id	tag flag
a	b	2
a	b	1
a	b	3
c	d	6
c	d	8
c	d	8

编写sql实现如下结果：
id	tag	flag
a	b	1|2|3
c	d	6|8

create table table13(
id string,
tag string,
flag string
)
row format delimited 
fields terminated by '\t';

load data local inpath '/opt/module/testdata/t13.txt' into table table13;

select
id,
tag,
concat_ws("|", collect_set(flag))
from table13
group by id, tag;

=============================================================================
14.
uid	name	tags
1	goudan	chihuo,huaci
2	mazi	sleep
3	laotie	paly

编写sql实现如下结果：
uid	name	tag
1	goudan	chihuo
1	goudan	huaci
2	mazi	sleep
3	laotie	paly

create table table14(
uid string,
name string,
tags string
)
row format delimited 
fields terminated by '\t';

load data local inpath '/opt/module/testdata/t14.txt' into table table14;

select
uid,
name,
tag
from table14
lateral view explode(split(tags, ',')) t1 as tag;

=================================================================================
15.
uid	contents
1	i|love|china
2	china|is|good|i|i|like

统计结果如下,如果出现次数一样，则按照content名称排序：
content	cnt
i	3
china	2
good	1
is	1
like	1
love	1


create table table15(
uid int,
contents string
)
row format delimited fields terminated by '\t';

load data local inpath '/opt/module/testdata/table15.txt' overwrite into table table15;

//注意|放到split时需要使用\\来转义
select
content
from table15
lateral view explode(split(contents, '\\|')) t1 as content; t2

select
content,
count(1) cnt
from (select
content
from table15
lateral view explode(split(contents, '\\|')) t1 as content)t2
group by content
order by cnt desc, content;

============================================================
16.
id course
1,a
1,b
1,c
1,e
2,a
2,c
2,d
2,f
3,a
3,b
3,c
3,e

根据编写sql，得到结果如下(表中的1表示选修，表中的0表示未选修)：
id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0

create table table16(
id int,
course string
)
row format delimited 
fields terminated by ',';

load data local inpath '/opt/module/testdata/table16.txt' overwrite into table table16;

select
id,
sum(case when course = 'a' then 1 else 0 end) a,
sum(case when course = 'b' then 1 else 0 end) b,
sum(case when course = 'c' then 1 else 0 end) c,
sum(case when course = "d" then 1 else 0 end) d,
sum(case when course = "e" then 1 else 0 end) e,
sum(case when course = "f" then 1 else 0 end) f
from table16
group by id;

=====================================================================
17.
name	sex	hobbys
janson	男	打乒乓球、游泳、看电影
tom	男	打乒乓球、看电影

hobby最多3个值，使用hql实现结果如下:
name	sex	hobby1	hobby2	hobby3
janson	男 	打乒乓球    游泳     看电影
tom	男	打乒乓球	看电影

create table table17(
name  string,
sex  string,
hobbys string
)
row format delimited 
fields terminated by '\t'
;

load data local inpath '/opt/module/testdata/table17.txt' overwrite into table table17;

select
name,
sex,
regexp_replace(hobbys, '、', ' ')
from table17;



================================================================
18.
用户   商品
A P1
B P1
A P2
B P3

请你使用hql变成如下结果:
用户    P1   P2    P3
A        1    1    0     
B        1    0    1

create table table18(
uname string,
pro string
)
row format delimited 
fields terminated by ' '
;

load data local inpath '/opt/module/testdata/table18.txt' overwrite into table table18;

select
uname,
sum(case when pro = 'P1' then 1 else 0 end) P1,
sum(case when pro = 'P2' then 1 else 0 end) P1,
sum(case when pro = 'P3' then 1 else 0 end) P1
from table18
group by uname;

================================================================================
19.
name	course		score
aa	English	75
bb	math	85
aa	math	90

使用hql输出以下结果
name	English		math
aa		75			90
bb		0			85

create table table19(
sname string,
course string,
score int
)
row format delimited 
fields terminated by '\t'
;

load data local inpath '/opt/module/testdata/table19.txt' overwrite into table table19;

select
sname,
sum(case when course='English' then score else 0 end) english,
sum(case when course='math' then score else 0 end) math
from table19
group by sname;

===================================================================================
20.
id	names
1 貂蝉,诸葛亮,花木兰
2 貂蝉,达摩,马超
3 貂蝉,杨戬,诸葛亮

请用HiveSQL计算出出场次数最多的top3英雄及其pick率(=出现场数/总场数)

create table table20(
id int,
names string
)
row format delimited
fields terminated by ' '
;

load data local inpath '/opt/module/testdata/table20.txt' overwrite into table table20;

select
count(1) over(distribute by id) totalCnt,
name
from table20
lateral view explode(split(names, ',')) t1 as name; t2

select
name,
count(1) cnt,
round(count(1) / totalCnt, 2) pick
from (select
count(1) over(distribute by id) totalCnt,
name
from table20
lateral view explode(split(names, ',')) t1 as name)t2
group by name, totalCnt   --这里注意分组时有两个参数
order by cnt desc
limit 3;

===============================================================================
21.
获取当前时间戳：
select unix_timestamp();
select unix_timestamp(current_timestamp);
select unix_timestamp(current_date, 'yyyy-MM-dd');
获取"2019-07-31 11:57:25"对应的时间戳: (这种格式可以不写格式)
select unix_timestamp("2019-07-31 11:57:25");
获取"2019-07-31 11:57"对应的时间戳：
select unix_timestamp("2019-07-31 11:57", 'yyyy-MM-dd HH:mm');
获取时间戳:1564545445所对应的日期和时分秒： (默认转化为'yyyy-MM-dd HH:mm:ss')
select from_unixtime(1564545445);
获取时间戳:1564545446所对应的日期和小时(yyyy/MM/dd HH):
select from_unixtime(1564545446, 'yyyy/MM/dd HH');

================================================================================
22.
求出两个数据集的差集
t1表：
id name
1 zs
2 ls

t2表：
id name
1 zs
3 ww

结果如下：
id name
2 ls
3 ww

create table table22_1(
id string,
name string
)
row format delimited 
fields terminated by ' '
;

load data local inpath '/opt/module/testdata/table22_1.txt' overwrite into table table22_1;

create table table22_2(
id string,
name string
)
row format delimited 
fields terminated by ' '
;

load data local inpath '/opt/module/testdata/table22_2.txt' overwrite into table table22_2;

select
t1.id id,
t1.name name
from table22_1 t1 left join table22_2 t2
on t1.id = t2.id
where t2.name is null

union 

select
t2.id id,
t2.name name
from table22_1 t1 right join table22_2 t2
on t1.id = t2.id
where t1.name is null;

===========================================================================
23.
A:
key	vlaue
k1	123
k2	234
k3	235

B:
key	value
k1	111
k2	222
k5	246

使用hive的hql实现，结果是B表数据如下:
k1	123
k2	234
k5	246

create table table23_1(
key string,
value string
)
row format delimited fields terminated by "\t"
;
create table table23_2(
key string,
value string
)
row format delimited fields terminated by "\t"
;
load data local inpath '/opt/module/testdata/table23_1.txt' overwrite into table table23_1;
load data local inpath '/opt/module/testdata/table23_2.txt' overwrite into table table23_2;

select
t2.key,
if(t1.value is null, t2.value, t1.value)
from table23_1 t1 right join table23_2 t2
on t1.key = t2.key;

===============================================================================
24.
有用户表user(uid,name)以及黑名单表Banuser(uid)
1、用left join方式查出所有不在黑名单的用户信息
2、用not exists方式出所有不在黑名单的用户信息

select
t1.uid,
t1.name
from user t1 left join Banuser t2
on t1.uid = t2.uid
where t2.id is null;

select
t1.uid,
t1.name
from user t1
where not exists(select 1 from Banuser t2 where t1.id = t2.id);

================================================================================
25.访问日志正则提取
表t1(注:数据时正常的访问日志数据,分隔符全是空格)
8.35.201.160 - - [16/May/2018:17:38:21 +0800] "GET /uc_server/data/avatar/000/01/54/22_avatar_middle.jpg HTTP/1.1" 200 5396

实现结果如下:
ip	dt	url
8.35.201.160	2018-5-16 17:38:21	/uc_server/data/avatar/000/01/54/22_avatar_middle.jpg

create table table25(
log  string
);

load data local inpath '/opt/module/testdata/table25.txt' overwrite into table table25;

select
regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",1),
from_unixtime(unix_timestamp(regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",2),"dd/MMM/yyyy:HH:mm:ss Z"),"yyyy-MM-dd HH:mm:ss"),
regexp_extract(log,"([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)",4)
from table25
where log regexp "([0-9.]+\\d+) - - \\[(.+ \\+\\d+)\\] .+(GET) (.+) (HTTP)\\S+ (\\d+) (\\d+)"
;

=======================================================================================
26.
dpt表
dpt_id dpt_name
1	产品
2	技术

user_dpt表
user_id dpt_id
1	1
2	1
3	2
4	2
5	3

result表
user_id dpt_id dpt_name
1       1       产品
2       1       产品
3       2       技术
4       2       技术
5       3       其他部门

create table table26_1(
dpt_id int,
dpt_name string
)
row format delimited fields terminated by '\t';

create table table26_2(
user_id int,
dpt_id int
)
row format delimited fields terminated by '\t';

load data local inpath '/opt/module/testdata/table26_1.txt' overwrite into table table26_1;
load data local inpath '/opt/module/testdata/table26_2.txt' overwrite into table table26_2;

select
t2.user_id,
t2.dpt_id,
nvl(t1.dpt_name, "其他部门")
from table26_1 t1 right join table26_2 t2
on t1.dpt_id = t2.dpt_id; 

==============================================================================

course_score表:
id,userid,course,score,year
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,lisi,数学,70,2016
4,wangwu,化学,80,2017
5,zhangsan,语文,85,2015
6,zhangsan,化学,80,2015

编写sql完成如下查询，一次查询实现最好，也可以写多次查询实现:
27、查出每个学期每门课程最高分记录(包含全部5个字段)
28、查出单个学期中语文课在70分以上的学生的数学成绩记录(包含全部5个字段)

create table table27(
id string,
name string,
course string,
score int,
year string
)
row format delimited fields terminated by ',';
load data local inpath '/opt/module/testdata/table27.txt' overwrite into table table27;


27.
select
year,
course,
max(score) maxcourse
from table27
group by year, course; t1

select
t2.*
from (select
year,
course,
max(score) maxcourse
from table27
group by year, course)t1 join table27 t2
on t1.year = t2.year and t1.course = t2.course and t1.maxcourse = t2.score;

28.
select
name
from table27
where course = '语文' and score > 70; t2

select
t1.*
from table27 t1 join (select
name
from table27
where course = '语文' and score > 70)t2
on t1.name = t2.name
where t1.course = '数学';

===============================================================================

29.stu_1
id,name,age,addr
1,zs1,22,bj
2,zs2,22,bj
3,zs3,22,bj
4,zs4,22,bj
5,zs5,22,bj

course_1
cid,cname
1,语文
2,数学
3,政治
4,美术
5,历史

course_sc
id,cid,score
1,1,87
1,2,92
1,3,69
2,2,83
2,3,92
2,4,87
2,5,83

查询平均成绩大于85的所有学生
select
id
from table29_3
group by id, cid
having avg(score) > 85;

==================================================================================
30.
1,100,19,a,2019-08-06 19:00:00
2,101,19,b,2019-08-06 19:00:01
3,100,19,a,2019-08-05 19:00:00
4,101,19,b,2019-08-05 19:00:01
5,102,19,a,2019-08-06 19:00:00
6,102,19,a,2019-08-06 19:00:01

create table table30(
order_id bigint,
user_id int,
amount double,
channel string,
ti string comment 'yyyy-MM-dd HH:mm:ss'
)
partitioned by(dt string comment 'yyyy-MM-dd')
row format delimited fields terminated by ',';

load data local inpath '/opt/module/testdata/table30.txt' overwrite into table table30 partition(dt='2019-08-06');
查询出2019-08-06号 每个渠道的下单用户数、订单总金额

select
channel,
count(distinct user_id),
sum(amount)
from table30
where dt = '2019-08-06' and to_date(ti) = '2019-08-06'
group by channel, to_date(ti);

=====================================================================================
31.
表A(登录表):
ds	user_id
2019-08-06	1
2019-08-06	2
2019-08-06	3
2019-08-06	4

表B(阅读表):
ds	user_id	read_num
2019-08-06	1	2
2019-08-06	2	3
2019-08-06	3	6

表C(付费表):
ds	user_id	price
2019-08-06	1	55.6
2019-08-06	2	55.8

基于上述三张表，请使用hive的hql语句实现如下需求：
(1)、用户登录并且当天有个阅读的用户数，已经阅读书籍数量
(2)、用户登录并且阅读，但是没有付费的用户数
(3)、用户登录并且付费，付费用户书籍和金额

select
count(1)
from table31_1 t1 join table31_2 t2
on t1.user_id = t2.user_id and t1.ds = t2.ds
group by t1.ds;

select
*
from table

create table table31_1(
ds string,
user_id string
)
row format delimited 
fields terminated by ' '
;



create table table31_2(
ds string,
user_id string,
read_num  int
)
row format delimited 
fields terminated by ' '
;

(1)
图样_Study
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Hive场景题（持续更新）

开启本地模式set hive.exec.mode.local.auto=true;===========================================================================1. 编写sql实现每个用户截止到每月为止的最大单月访问次数和累计到该月的总访问次数userid,month,visitsA,2015‐01,5A,2015‐01,15B,2015‐01,5A,2015‐01,8B,2015‐01,25A,2015‐01,5
复制链接

扫一扫