SQL高频面试题

第一题

数据

1001	01	90
1001	02	90
1001	03	90
1002	01	85
1002	02	85
1002	03	70
1003	01	70
1003	02	70
1003	03	85

建表语句

create table score
(
    uid string
    ,subject_id string
    ,score int
)
row format delimited fields terminated by '\t'
;

load data local inpath '/home/hive/score.txt' into table score;

select * from score;

需求

找出所有科目成绩都大于某一学科平均成绩的学生

sql

select  uid
from    (
            select  uid
                    ,if(score>avg_score,0,1) flag
            from    (
                        select  uid
                                ,score
                                ,avg(score) over(partition by subject_id) avg_score
                        from    score
                    ) t1
        ) t2
group by uid
having  sum(flag) = 0
;

第二题

数据

u01	2017/01/21	5
u02	2017/01/23	6
u03	2017/01/22	8
u04	2017/01/20	3
u01	2017/01/23	6
u01	2017/02/21	8
u02	2017/01/23	6
u01	2017/02/22	4

建表语句

create table action 
(
    userId string
    ,visitDate string
    ,visitCount int
)
row format delimited fields terminated by '\t';
load data local inpath '/home/hive_test_data/action.txt' into table action;
select * from action;

需求

统计出每个用户的累积访问次数,如下表所示:

用户id

月份

小计

累积

u01

2017-01

11

11

u01

2017-02

12

23

u02

2017-01

12

12

u03

2017-01

8

8

u04

2017-01

3

3

sql

with t1 as (
select  userId
        ,date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') visitDate
        ,visitCount
from    action
)
,t2 as (
select  userId
        ,visitDate
        ,sum(visitCount) visitCount
from    t1
group by userId
         ,visitDate
)
select  userId
        ,visitDate
        ,visitCount
        ,sum(visitCount) over (partition by userId order by visitDate rows between unbounded preceding and current row) visitCount_total
from    t2
;

第三题

数据

u1	a
u2	b
u1	b
u1	a
u3	c
u4	b
u1	a
u2	c
u5	b
u4	b
u6	c
u2	c
u1	b
u2	a
u2	a
u3	a
u5	a
u5	a
u5	a

建表语句

create table visit
(
    user_id string
    ,shop string
)
row format delimited fields terminated by '\t';
load data local inpath '/home/hive_test_data/visit.txt' into table visit;
select * from visit;

需求

有50W个京东店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,请统计:

1)每个店铺的UV(访客数)

2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数

sql

select  shop
        ,count(distinct user_id) uv
from    visit
group by shop
;
with temp as (
select  shop
        ,user_id
        ,count(1) visit_count
from    visit
group by user_id,shop
)
,rn as (
select  shop
        ,user_id
        ,visit_count
        ,rank() over(partition by shop order by visit_count desc) as rn 
from    temp
)
select  shop
        ,user_id
        ,visit_count
from    rn
where   rn <= 3
;

第四题

数据

2017-01-01	10029028	1000003251	33.57
2017-01-10	11039016	1100003000	25.50
2017-01-25	11039015	1101003001	95.50
2017-02-01	11029028	1000003251	30.57
2017-02-10	11029016	1100003000	15.50
2017-03-01	10029030	1000003251	33.57
2017-03-10	11039017	1100003000	25.50
2017-03-25	11039080	1101003001	95.50
2017-04-10	21039017	1100003000	75.15
2017-04-25	21039080	1101003001	45.59
2017-05-07	20029028	1000003251	32.57
2017-05-10	41039016	1100003000	20.50
2017-06-22	31039015	1101003001	15.50
2017-07-08	60029028	1000003251	39.57
2017-07-11	51039016	1100003000	28.50
2017-08-01	70029028	1000003251	83.57
2017-09-10	81039016	1100003000	65.50
2017-10-25	91039015	1101003001	25.50
2017-11-10	10039016	1100003000	65.50
2017-11-25	10039015	1201003001	25.50
2017-12-01	11020028	1000003251	33.57
2017-12-10	12039016	1100003000	25.50
2017-12-25	14039015	1101003001	95.50
2017-12-25	10039089	1201003001	25.50

建表语句

create table order_tab
(
    dt string
    ,order_id string
    ,user_id string
    ,amount decimal(10,2)
)
row format delimited fields terminated by '\t';
load data local inpath '/home/hive_test_data/order_tab.txt' into table order_tab;
select * from order_tab;

需求

1)给出 2017年每个月的订单数、用户数、总成交金额。

2)给出2017年11月的新客数(指在11月才有第一笔订单)

sql

select  date_format(dt,'yyyy-MM') dt
        ,count(order_id) orders
        ,count(distinct user_id) users
        ,sum(amount) amount
from    order_tab
where   year(dt) = '2017'
group by date_format(dt,'yyyy-MM')
;
select  count(user_id) new_users
from    order_tab
group by user_id
having  date_format(min(dt),'yyyy-MM') = '2017-11'
;

第五题

数据

2019-02-11,test_1,23
2019-02-11,test_2,19
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-12,test_2,19
2019-02-13,test_1,23
2019-02-15,test_2,19
2019-02-16,test_2,19

建表语句

create table user_age
(
    dt string
    ,user_id string
    ,age int
)
row format delimited fields terminated by ',';
load data local inpath '/home/hive_test_data/user_age.txt' into table user_age;
select * from user_age;

需求

求所有用户和活跃用户的总数及平均年龄。(活跃用户指连续两天都有访问记录的用户)

sql

with temp as (
select  dt
        ,user_id
        ,age
from    user_age
group by dt
         ,user_id
         ,age
)
,temp2 as (
select  distinct user_id
        ,age
from    temp
)
,res1 as (
select  '所有用户' as user_type
        ,count(1) users
        ,cast(sum(age)/count(user_id) as decimal(10,2)) avg_age
from    temp2
)
,lead_dt as (
select  dt
        ,user_id
        ,age
        ,lead(dt,1,'9999-12-30') over(partition by user_id order by dt) dt_1
from    temp
)
,continue_users as (
select  user_id
        ,age
from    lead_dt
where   datediff(dt_1,dt) = 1
)
,continue_users2 as (
select  distinct user_id
        ,age
from    continue_users
)
,res2 as (
select  '活跃用户' as user_type
        ,count(1) users
        ,cast(sum(age)/count(1) as decimal(10,2)) avg_age
from    continue_users2
)
select  *from    res1
union all
select  *from    res2
;

第六题

数据

1000003251	33	2017-01-01	10029028
1100003000	25	2017-01-10	11039016
1101003001	95	2017-01-25	11039015
1000003251	30	2017-02-01	11029028
1100003000	15	2017-02-10	11029016
1000003251	33	2017-03-01	10029030
1100003000	25	2017-03-10	11039017
1101003001	95	2017-03-25	11039080
1100003000	75	2017-04-10	21039017
1101003001	45	2017-04-25	21039080
1000003251	32	2017-05-07	20029028
1100003000	20	2017-05-10	41039016
1101003001	15	2017-06-22	31039015
1000003251	39	2017-07-08	60029028
1100003000	28	2017-07-11	51039016
1000003251	83	2017-08-01	70029028
1100003000	65	2017-09-10	81039016
1101003001	25	2017-10-25	91039015
1010013251	50	2017-10-26	10129028
1100003000	65	2017-11-10	10039016
1201003001	25	2017-11-25	10039015
1000003251	33	2017-12-01	11020028
1100003000	25	2017-12-10	12039016
1101003001	95	2017-12-25	14039015
1201003001	25	2017-12-25	10039089
1010013251	30	2017-12-26	12129028

建表语句

create table ordertable
(
    userid string
    ,money int
    ,paymenttime string
    ,orderid string
)
row format delimited fields terminated by '\t';
load data local inpath '/home/hive_test_data/ordertable.txt' into table ordertable;
select * from ordertable;

需求

表ordertable字段(购买用户:userid,金额:money,购买时间:paymenttime(格式:2017-10-01),订单id:orderid)

请用sql写出所有用户中在今年10月份第一次购买商品的金额

sql

with temp as (
select  userid
        ,min(paymenttime) paymenttime
from    ordertable
where   date_format(paymenttime,'yyyy-MM') = '2017-10'
group by userid
)
select  temp.userid
        ,temp.paymenttime
        ,ordertable.money
from    temp
        ,ordertable
where   temp.userid = ordertable.userid
and     temp.paymenttime = ordertable.paymenttime
;

第七题

数据

2016-11-09 14:22:05	/api/user/login	110.23.5.33
2016-11-09 11:23:10	/api/user/detail	57.3.2.16
2016-11-09 14:59:40	/api/user/login	200.6.5.166
2016-11-09 14:22:05	/api/user/login	110.23.5.34
2016-11-09 14:22:05	/api/user/login	110.23.5.34
2016-11-09 14:22:05	/api/user/login	110.23.5.34
2016-11-09 11:23:10	/api/user/detail	57.3.2.16
2016-11-09 23:59:40	/api/user/login	200.6.5.166
2016-11-09 14:22:05	/api/user/login	110.23.5.34
2016-11-09 11:23:10	/api/user/detail	57.3.2.16
2016-11-09 23:59:40	/api/user/login	200.6.5.166
2016-11-09 14:22:05	/api/user/login	110.23.5.35
2016-11-09 14:23:10	/api/user/detail	57.3.2.16
2016-11-09 23:59:40	/api/user/login	200.6.5.166
2016-11-09 14:59:40	/api/user/login	200.6.5.166
2016-11-09 14:59:40	/api/user/login	200.6.5.166

建表语句

create table ip
(
    time string
    ,interface string
    ,ip string
)
row format delimited fields terminated by '\t';
load data local inpath '/home/hive_test_data/ip.txt' into table ip;
select * from ip;

需求

求11月9号下午14点(14-15点),访问api/user/login接口的top10的ip地址

sql

select  ip
        ,count(1) visit_count
from    ip
where   date_format(time,'yyyy-MM-dd HH') >= '2016-11-09 14'
and     date_format(time,'yyyy-MM-dd HH') <= '2016-11-09 15'
and     interface = 'api/user/login'
group by ip
order by visit_count desc
limit   10
;

 

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值