Hivesql10题

1.找出所有科目成绩都大于某一学科平均成绩的学生

数据集如下

uid  subject_id  score 
1001	01		 90
1001	02		 90
1001	03		 90
1002	01		 85
1002	02		 85
1002	03		 70
1003	01		 70
1003	02		 70
1003	03		 85
select
	uid
from
	(
	select
		uid
		,if(score>avg_score,0,1) flag
	from
		(
		select
			uid
			,subject_id
			,avg(score) over(partition by subject_id) avg_score
		from
			score) t1) t2
where
group by
	uid
having
	sum(flag)= 0;
2.统计出每个用户的累积访问次数

用户访问数据

userId	visitDate	visitCount
u01		2017/1/21	5
u02		2017/1/23	6
u03		2017/1/22	8
u04		2017/1/20	3
u01		2017/1/23	6
u01		2017/2/21	8
u02		2017/1/23	6
u01		2017/2/22	4

最后结果如下所示

用户id	月份		小计	累积
u01		2017-01	 11	 11
u01		2017-02	 12	 23
u02		2017-01	 12	 12
u03		2017-01	 8	 8
u04		2017-01	 3	 3
select
    userId,
    mn,
    mn_count,
    sum(mn_count) over(partition by userId order by mn)
from 
(   select
        userId,
        mn,
        sum(visitCount) mn_count
    from
         (select
             userId,
             date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
             visitCount
         from
             action)t1
group by userId,mn)t2;
3.有50W个京东店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,请统计:

1)每个店铺的UV(访客数)

select shop
		,count(distinct user_id)
from visit
group by shop;

2)每个店铺访问次数top3的访客信息,输出店铺名称、访客id、访问次数。

select shop
	 	,user_id
	 	,ct
from (select shop
		,user_id
		,ct
		,rank() over(partition by shop order by ct) rk
from 
	(select shop
	   		,user_id
	   		,count(*) ct
	from visit
	group by shop,user_id) t1
 ) t2
where rk<=3;		
4.已知一个表STG.ORDER,有如下字段:Date,Order_id,User_id,amount。请给出sql进行统计:数据样例:2017-01-01,10029028,1000003251,33.57。(表名统一用order_tab)
1)给出 2017年每个月的订单数、用户数、总成交金额。
select
   date_format(dt,'yyyy-MM'),
   count(order_id),
   count(distinct user_id),
   sum(amount)
from
   order_tab
where
   date_format(dt,'yyyy')='2017'
group by
   date_format(dt,'yyyy-MM');
2)给出2017年11月的新客数(指在11月才有第一笔订单)
select
   count(user_id)
from
   order_tab
group by
   user_id
having
   date_format(min(dt),'yyyy-MM')='2017-11';
5.有日志如下,请写出代码求得所有用户和活跃用户的总数及平均年龄。(活跃用户指连续两天都有访问记录的用户)日期 用户 年龄

数据如下所示,表名user_age

dt		user_id   age
2019-02-11,test_1,23
2019-02-11,test_2,19
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-12,test_2,19
2019-02-13,test_1,23
2019-02-15,test_2,19
2019-02-16,test_2,19
-- 求得所有用户和活跃用户的总数及平均年龄。
-- (活跃用户指连续两天都有访问记录的用户)

-- 1.按日期以及用户分组,按日期排序并给出排名
select
	dt,
	user_id,
	min(age) age,
	rank() over(partition by user_id order by dt) rk
from
	user_age
group by
	dt,
	user_id
-- 2.计算日期和排名的差值
select
	user_id,
	age,
	date_sub(dt, rk) flag
from
	(
	select
		dt,
		user_id,
		min(age) age,
		rank() over(partition by user_id order by dt) rk
	from
		user_age
	group by
		dt,
		user_id) t1
-- 3.过滤出差值大于2的,即为连续两天活跃的用户
select
	user_id,
	min(age) age
from
	(
	select
		user_id,
		age,
		date_sub(dt, rk) flag
	from
		(
		select
			dt,
			user_id,
			min(age) age,
			rank() over(partition by user_id order by dt) rk
		from
			user_age
		group by
			dt,
			user_id) t1
		)t2
group by
	user_id,
	flag
having
	count(*)>= 2
-- 4.对数据进行去重处理
select
	user_id,
	min(age) age
from
	(select
	user_id,
	min(age) age
from
	(
	select
		user_id,
		age,
		date_sub(dt, rk) flag
	from
		(
		select
			dt,
			user_id,
			min(age) age,
			rank() over(partition by user_id order by dt) rk
		from
			user_age
		group by
			dt,
			user_id) t1
		)t2
group by
	user_id,
	flag
having
	count(*)>= 2)t3
group by
	user_id
-- 5.计算活跃用户的人数以及平均年龄
select
	count(*) ct,
	cast(sum(age)/ count(*) as decimal(10, 2))
from
	(
	select
		user_id,
		min(age) age
	from
		(
		select
			user_id,
			min(age) age
		from
			(
			select
				user_id,
				age,
				date_sub(dt, rk) flag
			from
				(
				select
					dt,
					user_id,
					min(age) age,
					rank() over(partition by user_id order by dt) rk
				from
					user_age
				group by
					dt,
					user_id) t1
		)t2
		group by
			user_id,
			flag
		having
			count(*)>= 2)t3
	group by
		user_id)t4;
-- 6.对全量数据进行去重(按用户去重)
select
	user_id,
	min(age) age
from
	user_age
group by
	user_id;

-- 7.计算所有用户的数量以及平均年龄
select
	count(*) user_count,
	cast((sum(age)/ count(*)) as decimal(10, 1))
from
	(
	select
		user_id,
		min(age) age
	from
		user_age
	group by
		user_id)t5;
-- 8.把4和7的数据进行union
select
	0 user_total_count,
	0 user_total_avg_age,
	count(*) twice_count,
	cast(sum(age)/ count(*) as decimal(10, 2)) twice_count_avg_age
from
	(
	select
		user_id,
		min(age) age
	from
		(
		select
			user_id,
			min(age) age
		from
			(
			select
				user_id,
				age,
				date_sub(dt, rk) flag
			from
				(
				select
					dt,
					user_id,
					min(age) age,
					 rank() over(partition by user_id order by dt) rk
				from
					user_age
				group by
					dt,
					user_id
    ) t1
    ) t2
		group by
			user_id,
			flag
		having
			count(*)>= 2)t3
	group by
		user_id 
) t4
union all

select
	count(*) user_total_count,
	cast((sum(age)/ count(*)) as decimal(10, 1)),
	0 twice_count,
	0 twice_count_avg_age
from
	(
	select
		user_id,
		min(age) age
	from
		user_age
	group by
		user_id
   ) t5
-- 9.最终整合
select
	sum(user_total_count),
	sum(user_total_avg_age),
	sum(twice_count),
	sum(twice_count_avg_age)
from
	(
	select
		0 user_total_count,
		0 user_total_avg_age,
		count(*) twice_count,
		cast(sum(age)/ count(*) as decimal(10, 2)) twice_count_avg_age
	from
		(
		select
			user_id,
			min(age) age
		from
			(
			select
				user_id,
				min(age) age
			from
				(
				select
					user_id,
					age,
					date_sub(dt, rk) flag
				from
					(
					select
						dt,
						user_id,
						min(age) age,
						rank() over(partition by user_id order by dt) rk
					from
						user_age
					group by
						dt,
						user_id
    				) t1
    			) t2
			group by
				user_id,
				flag
			having count(*)>= 2
			) t3
		group by
			user_id 
		) t4
union all
 select
		count(*) user_total_count,
		cast((sum(age)/ count(*)) as decimal(10, 1)),
		0 twice_count,
		0 twice_count_avg_age
	from
		(
		select
			user_id,
			min(age) age
		from
			user_age
		group by
			user_id
   ) t5
) t6;
6.请用sql写出所有用户中在今年10月份第一次购买商品的金额,表名ordertable,字段(购买用户:userid,金额:money,购买时间:paymenttime(格式:2017-10-01),订单id:orderid)
-- 1.
select 
		userid,
		min(paymenttime) paymenttime
from ordertable
where date_format(paymenttime,'yyyy-MM')='2017-10'
group by userid
-- 2.
select 
		t1.userid,
		t1.paymenttime,
		ot.money
from (select 
		userid,
		min(paymenttime) paymenttime
from ordertable
where date_format(paymenttime,'yyyy-MM')='2017-10'
group by userid) t1
join ordertable ot 
on t1.userid=ot.userid and t1.paymenttime=ot.paymenttime;
7.求11月9号下午14点(14-15点),访问api/user/login接口的top10的ip地址

有一个线上服务器访问日志格式如下
时间 --------------------- 接口 ------------ ip地址
2016-11-09 11:22:05 /api/user/login 110.23.5.33
2016-11-09 11:23:10 /api/user/detail 57.3.2.16

2016-11-09 23:59:40 /api/user/login 200.6.5.166
求11月9号下午14点(14-15点),访问api/user/login接口的top10的ip地址

还在完善中…(有时间就弄完…)

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值