Hive查询练习二

目标

inner join
left join
full join
union all
综合练习

表结构

image.png
image.png
image.png
image.png
image.png
image.png
image.png



inner join

1.在2019年购买后又退款的用户
'''
※【注意去重】去重31条,没有去重61条
尽量在做表连接之前去重,这样效率才高
'''
select a.user_name
from 
(select distinct user_name
from user_trade 
where year(dt)='2019') a
join 
(select distinct user_name
from user_refund
where year(dt)='2019') b
on a.user_name=b.user_name;
2.在2017年和2018年都购买的用户【134条】
select a.user_name
from
(select distinct user_name
from user_trade
where year(dt)='2017') a
join 
(select distinct user_name
from user_trade
where year(dt)='2018') b
on a.user_name=b.user_name;
3.在2017、2018、2019年都有交易的用户
select a.user_name
from
(select distinct user_name
from trade_2017) a
join 
(select distinct user_name
from trade_2018) b
on a.user_name=b.user_name
join 
(select distinct user_name
from trade_2019) c
on b.user_name=c.user_name;


left join

1.在user_list_1表中但是不在user_list_2的用户
select a.user_id,
a.user_name
from user_list_1 a
left join user_list_2 b
on a.user_name=b.user_name
where b.user_name is null;
'''
【hive不可以在in后面接子查询】
select user_id, 
user_name
from user_list_1 
where user_id not in
(select user_id
from user_list_2);
'''

image.png

2.在2019年购买,但是没有退款的用户【32条】
select a.user_name
from
(select distinct user_name
from user_trade
where year(dt)='2019') a
left join 
(select distinct user_name
from user_refund
where year(dt)='2019') b
on a.user_name=b.user_name
where b.user_name is null;
3.在2019年购买和用户的学历分布【 ※※※】

1.在user_trade表中提取19年购买的用户姓名
2.在user_info表中提取每个人的学历信息
3.根据用户姓名进行连接

select 
b.education,
count(a.user_name)
from
(select distinct user_name
from user_trade
where year(dt)='2019') a
left join 
(select user_name,
get_json_object(extra1,'$.education') education
from user_info ) b
on a.user_name=b.user_name
group by b.education;

image.png

4.在2017年和2018年都购买,但是没有在2019年购买的用户
'''
不需要重命名再左连接
'''
select a.user_name
from
(select distinct user_name
from trade_2017) a
join 
(select distinct user_name
from trade_2018) b
on a.user_name=b.user_name
left join 
(select distinct user_name
from trade_2019) c
on b.user_name=c.user_name
where c.user_name is null;


full join【并集】

1.user_list_1和user_list_2的所有用户

user_list_1:【1,2,3,4,5,6】
user_list_2:【1,3,4,7,8,9】
合并:【1,2,3,4,5,6,7,8,9】

'''
--coalesce函数,按照列名依次读取,遇到非null即停止并返回该值,
如果两个参数都是null,最终返回null
最终显示一列
'''
select coalesce(a.user_name,b.user_name)
from user_list_1 a 
full join user_list_2 b
on a.user_id=b.user_id;

image.png



union all【追加】

  • 字段名称必须一致【不同,重命名即可】
  • 字段顺序必须一致
  • 没有连接条件
将user_list_1和user_list_3合并在一起

user_list_1:【1,2,3,4,5,6】
user_list_3:【90,91,92,93,94】
合并:【1,2,3,4,5,6,90,91,92,93,94】

select user_id,
user_name
from user_list_1
union all 
select user_id,
user_name
from user_list_3;

image.png

1.2017-2019年有交易的所有用户数【※※】
  • 推荐:先把每个表要合并的字段distinct再union all,但最后都要distinct;而union时,最后面就不需要再加distinct
  • 常见的错误:
    1.没有重命名
    2.直接union all 表名,而不是select…from形式
select count(distinct a.user_name)
from 
(select user_name
from trade_2017
union all 
select user_name
from trade_2018
union all
select user_name
from trade_2019) a;
2.2019年每个用户的支付和退款金额汇总
  • 业务逻辑:不是每个人在2019年都有支付金额和退款金额
    即:支付金额为0,退款金额不为0 ;支付金额不为0 ,退款金额为0
'''
用 union all 来实现
1.在trade表中,按照用户分组,每个用户的支付金额和退款金额(补0)
2.在refund表中,按照用户分组,每个用户的退款金额和支付金额(补0)
3.用 union all 追加,然后 group by 用户,再各自求和

'''
select a.user_name,
sum(a.pay_amount),
sum(a.refund_amount)
from 
(select user_name,
	sum(pay_amount) pay_amount,
0 refund_amount
from user_trade
where year(dt)='2019'
group by user_name
union all
select user_name,
0 pay_amount,
sum(refund_amount) refund_amount
from user_refund
where year(dt)='2019'
group by user_name) a
group by a.user_name;


'''
用full join 来实现 (比union all慢很多) 合并时4列
关键:将null值变为0
1.各自按照用户分组进行支付和退款汇总;
2.按用户连接,用 coalesce 函数;
3.将null改为0
'''
select coalesce(a.user_name,b.user_name),
if(a.pay_amount is null,0,a.pay_amount),
if(b.refund_amount is null,0,b.refund_amount)
from
(select user_name,
sum(pay_amount) pay_amount
from user_trade
where year(dt)='2019'
group by user_name) a
full join
(select user_name,
sum(refund_amount) refund_amount
from user_refund
where year(dt)='2019'
group by user_name) b
on a.user_name=b.user_name;

image.png
image.png

'''
拓展:2019年每个支付用户的支付金额和退款金额:
'''
select a.user_name,
a.pay_amount,
if(b.refund_amount is null,0,b.refund_amount)
from
(select user_name,
sum(pay_amount) pay_amount
from user_trade
where year(dt)='2019'
group by user_name) a
left join 
(select user_name,
sum(refund_amount) refund_amount
from user_refund
where year(dt)='2019'
group by user_name) b
on a.user_name=b.user_name;

image.png



综合练习

1.首次激活时间在2019年,但是一直没有支付的用户年龄段分布(user_trade、user_info)
'''1.查询首次激活时间在2019年的用户年龄分布'''

select user_name,
case when age<20 then '20岁以下'
            when age>=20 and age<30 then '20-30岁'
            when age>=30 and age<40 then '30-40岁'
            else '40岁以上' end age_level
from user_info
where year(firstactivetime)='2019'

'''2.将上面查询左连接trade,查支付金额为空的部分
(只要在trade这张表中就表示有支付金额)'''

select distinct user_name
from user_trade
where dt>0

'''3.查询首次激活时间在2019年且支付金额为0的用户年龄分布'''

select a.age_level,
count(a.user_name)
from
(select user_name,
case when age<20 then '20岁以下'
            when age>=20 and age<30 then '20-30岁'
            when age>=30 and age<40 then '30-40岁'
            else '40岁以上' end age_level
from user_info
where year(firstactivetime)='2019') a
left join 
(select distinct user_name
from user_trade) b
on a.user_name=b.user_name
where b.user_name is null
group by a.age_level;

image.png

#####方法二

'''1.首次激活时间在2019年且支付金额为0的用户姓名和年龄:'''
select a.user_name,
a.age
from
(select distinct user_name,
age       
from user_info 
where year(firstactivetime)='2019') a
left join 
(select user_name
from user_trade 
where dt>0) b
on a.user_name=b.user_name
where b.user_name is null;

'''2.年龄分布查询:'''
select case when age<20 then '20岁以下'
            when age>=20 and age<30 then '20-30岁'
            when age>=30 and age<40 then '30-40岁'
            else '40岁以上' end,
        count(distinct user_id) user_num
from user_info
group by case when age<20 then '20岁以下'
            when age>=20 and age<30 then '20-30岁'
            when age>=30 and age<40 then '30-40岁'
            else '40岁以上' end;

'''3.将上面的查询作为新表,然后进行年龄分布查询:'''
select case when c.age<20 then '20岁以下'
            when c.age>=20 and c.age<30 then '20-30岁'
            when c.age>=30 and c.age<40 then '30-40岁'
            else '40岁以上' end,
        count(distinct c.user_name) user_num
from
(select a.user_name,
a.age
from
(select distinct user_name,
age       
from user_info 
where year(firstactivetime)='2019') a
left join 
(select user_name
from user_trade ) b
on a.user_name=b.user_name
where b.user_name is null) c
group by case when c.age<20 then '20岁以下'
            when c.age>=20 and c.age<30 then '20-30岁'
            when c.age>=30 and c.age<40 then '30-40岁'
            else '40岁以上' end;

image.png

2.2018、2019年交易的用户,其激活时间段分布(trade_2018、trade_2019、user_info)
''' 用union不需要重复写distinct
1.查出2018、2019年交易用户的并集
2.用户激活时间段分布
3.合并
'''
select substr(firstactivetime,12,2),
count(a.user_name)
from
(select user_name
from trade_2018
union 
select user_name
from trade_2019) a
left join
user_info c
on a.user_name=c.user_name
group by substr(firstactivetime,12,2);
'''
group by substr(firstactivetime,12,2)等价于
group by hour(firstactivetime)
'''

image.png

3.在2019年购买后又退款的用户性别分布(user_trade、user_refund)
'''1.查出2019年购买后又退款的用户及性别
2.性别分布'''

select c.sex,
count(a.user_name)
from
(select distinct user_name
from user_trade 
where year(dt)='2019') a
join 
(select distinct user_name
from user_refund
where year(dt)='2019') b
on a.user_name=b.user_name
join 
(select distinct user_name,
sex
from user_info) c
on b.user_name=c.user_name
group by c.sex;

image.png

【错误写法】:
select c.user_name
from
((select distinct user_name
from user_trade 
where year(dt)='2019') a
join 
(select distinct user_name
from user_refund
where year(dt)='2019') b
on a.user_name=b.user_name)c;

【正确写法】:
select a.user_name
from
(select distinct user_name
from user_trade 
where year(dt)='2019') a
join 
(select distinct user_name
from user_refund
where year(dt)='2019') b
on a.user_name=b.user_name;
4.在2018年购买,但是没有在2019年购买的用户城市分布(user_trade、user_refund)

#重点

select d.city,
count(c.user_name)
from
(select a.user_name
from
(select distinct user_name
from user_trade
where year(dt)='2018') a
left join
(select distinct user_name
from user_trade
where year(dt)='2019') b
on a.user_name=b.user_name
where b.user_name is null) c
left join 
(select distinct user_name,
city
from user_info) d
on c.user_name=d.user_name
group by d.city;

image.png

5.在2017-2019年,有交易但是没有退款的用户的手机品牌分布(trade_2017、trade_2018、trade_2019、user_refund、user_info)
select d.phonebrand,
count(c.user_name)
from
(select a.user_name
from
(select user_name
from trade_2017
union 
select user_name
from trade_2018
union
select user_name
from trade_2019) a
left join 
(select distinct user_name
from user_refund
where year(dt) between '2017' and '2019') b
on a.user_name=b.user_name
where b.user_name is null) c
left join 
(select distinct user_name,
get_json_object(extra1,'$.phonebrand') phonebrand
from user_info) d
on c.user_name=d.user_name
group by phonebrand;

image.png

【错】
select c.user_name
from
((select distinct user_name
from user_trade
where year(dt)='2018') a
left join
(select distinct user_name
from user_trade
where year(dt)='2019') b
on a.user_name=b.user_name
where b.user_name is null) c

【对】
select a.user_name
from
(select user_name
from trade_2017
union 
select user_name
from trade_2018
union
select user_name
from trade_2019) a;


【对】
select a.user_name
from
(select distinct user_name
from user_trade
where year(dt)='2018') a
left join
(select distinct user_name
from user_trade
where year(dt)='2019') b
on a.user_name=b.user_name
where b.user_name is null
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值