美团+B站+腾讯微保+小鹏汽车数分笔试答案
在大数据领域或者是数据分析领域,SQL成为了一门重要的语言,在离线分析场景中起到了重大的作用;比如连续登录,高峰期、等等场景更是常用。
练习的地址:http://sqlintern.com/gamora2
青春版练习界面:
尊享版练习界面:
做题界面:
1. 巩固基础-有趣的大数据扫黄
- 聚合函数、去重计算和简单的过滤筛选
- 分组计算和排序
- 通配符,或与非逻辑运算
- 日期时间处理、字符串类型和左连接
- 窗口函数和综合应用
2. 场景练习-热门出行目的地探索分析
- 用车人数
- 订单高峰期
- 热门目的地
- 车型单量
- 热门路线
3. 举一反三-高频考点之连续登录专题
- 美团连续登录
- 小鹏连续快充
- 微保连续点击
4. 挑战大厂真题-哔哩哔哩,如何计算大会员收入?
------------------------以下为答案--------------------------------
--1.2021年6-9月份,共有多少个商户有动账、涉及多少笔交易、交易规模为?
select
count(distinct mch_nm) as mch_cnt
, count(1) trx_cnt
, sum(trx_amt) trx_amt
from
ghs_tb
where
trx_tm between '2021-06-01' and '2021-09-30' --同学们对比下,改成'2021-10-01'结果会变成什么,正确写法又是什么?
--2.2021年9月,足疗按摩分类下,人均交易金额TOP3商家是:
select
mch_nm
, sum(trx_amt) as trx_amt
, count(distinct usr_id) as usr_cnt
, sum(trx_amt)/count(distinct usr_id) as avg_amt
from ghs_tb where substr(trx_tm,1,7)= '2021-09'
and mch_typ='足疗按摩'
group by mch_nm
order by sum(trx_amt)/count(distinct usr_id) desc
--3.2021年9月,商户名称中包含了“保健”,“养生”,“会所”,“按摩”等关键词,且笔均交易金额大于等于500元的商家有多少个?
select mch_nm, count(1), sum(trx_amt)
from ghs_tb
where mch_nm rlike '.*(保健|养生|会所|按摩).*'
and substr(trx_tm,1,7)='2021-09'
group by mch_nm
having sum(trx_amt)/count(1)>=500
--4.定义:
--1)发生在22:00-次日3:00;
--2)消费金额为整数, 尾号为88、98、99(如388元,1298元);
--3)金额在[288, 2000]的交易为涉黄交易。
--设某商户某天交易量为M,当天涉黄交易量为m,若任意一天m/M超过(包含)50%, 则标记该商户为可能潜在涉黄商户。
--以下哪个商户不属于潜在涉黄商户?(请找出所有潜在涉黄商户
select
a.mch_nm
, a.trx_dt
, a.ttl_cnt
, case when ghs_cnt is null then 0 else ghs_cnt/ttl_cnt end as pct /* null值的处理也可以用ifnull函数,同学们自行尝试 */
from
(select
mch_nm
, substr(trx_tm,1,10) as trx_dt /* 还有至少两种方式提取日期,date/datetimefomat,同学们自己去试试吧!*/
, count(1) as ttl_cnt
from
ghs_tb
group by
mch_nm, substr(trx_tm,1,10)
)a
left join
(
select
mch_nm
, substr(trx_tm,1,10) as trx_dt
, count(1) ghs_cnt
from
ghs_tb
where
(time(trx_tm) between '22:00:00' and '24:00:00' or time(trx_tm) between '00:00:00' and '03:00:00') /* 又是经典的时间提取函数!*/
and (trx_amt like '%99.00'
or trx_amt like '%98.00' or trx_amt like '%88.00')
and trx_amt between 288 and 2000
group by
mch_nm
, substr(trx_tm,1,10)
)b
on
a.mch_nm = b.mch_nm and a.trx_dt = b.trx_dt /* 两个关联条件 */
having
pct>=0.5
--5. 若用户在3天内有过至少2次【足疗按摩】消费,则定义该用户为【杉徒】, 以下哪个用户不是杉徒?(请找出所有杉徒)
select
distinct usr_id
from
(
select
usr_id
, trx_tm
, lag(trx_tm,1, '1900-12-31') over(partition by usr_id order by trx_tm) as lag_tm /* 偏移函数 */
from
ghs_tb
where mch_typ='足疗按摩'
)t
where datediff(trx_tm, lag_tm)<=3 /* 两次消费日期差小于等于3天 */
--6、以下哪个地址的用车人数最多?
select
start_loc,count(distinct cust_uid)
from
didi_sht_rcd
group by
start_loc
order by 2 desc
--7、以前海湾休闲会所为目的地的订单高峰期是几点?
select
hour(start_tm),
count(1)
from didi_sht_rcd
where end_loc like '%前海湾休闲%'
group by hour(start_tm) order by 2 desc;
--8、用车人次最高的住宅、用车人次第一的酒吧分别是?
select b.loc_ctg, a.start_loc, c1, dense_rank()over(partition by loc_ctg order by c1 desc)rnk
from
(select start_loc, count(1) c1
from didi_sht_rcd group by start_loc) a
inner join
( select loc_nm, loc_ctg from loc_nm_ctg group by loc_nm, loc_ctg)b
on a.start_loc = b.loc_nm
where loc_ctg in ('住宅', '酒吧');
--9、从机场到酒店,单量最高的车型为?
select
r.car_cls,
count(distinct cust_uid) 'cnt'
from didi_sht_rcd r
inner join loc_nm_ctg s on r.start_loc=s.loc_nm
inner join loc_nm_ctg e on r.end_loc=e.loc_nm
where s.loc_ctg = '机场'
and e.loc_ctg = '酒店'
group by r.car_cls
order by cnt desc
;
--10、以下哪种说法错误?
select * from
(
select b.loc_ctg as start_ctg,
a.start_loc,c.loc_ctg as end_ctg,
a.end_loc, count(1) c1,
dense_rank()over(partition by b.loc_ctg,a.end_loc order by count(1) desc ) rnk
from didi_sht_rcd a
inner join loc_nm_ctg b
on a.start_loc = b.loc_nm
inner join loc_nm_ctg c
on a.end_loc = c.loc_nm
group by b.loc_ctg , a.start_loc,c.loc_ctg, a.end_loc
order by b.loc_ctg)t
where start_ctg in ('酒店', '住宅', '写字楼') and rnk=1 and end_ctg
--11、连续登录
select count(distinct usr_id)
from
(
select usr_id, load_dt2, count(1) load_days
from
(
select usr_id, load_dt, rnk, date_sub(load_dt, interval rnk day) as load_dt2
from
(
select
a.usr_id
, a.load_dt
, row_number()
over(partition by a.usr_id order by a.load_dt) rnk
from
(
select
usr_id
, load_date load_dt
from mt_t1
)a
)b
)c
group by usr_id,load_dt2
having load_days >= 2 --大于等于2
)t
;
--12、连续点击
select distinct usr_id
from
(
select *, rank_1- rank_2 as diff
from
(
select *,
row_number() over(order by click_time) as rank_1,
row_number() over(partition by usr_id order by click_time) as rank_2
from wb_t1
) b
) c
group by diff,usr_id
having count(diff) >=2
--13、B站大会员摊销收入
select
y_m
, sum(avg_day_amt)
from
(select
*
, pay_amount/(datediff(end_date, begin_date)+1) as avg_day_amt
from
(select * from bilibili_m2 where m_date between '2021-01-01' and '2021-05-31')a
join
bilibili_m1 b
on m_date >= begin_date and m_date <= end_date and user_id='B1004'
)t
group by y_m;