题1:编写SQL拉取存在连续4天的用户
数据源表:active_info
字段:userid(用户id),ftime(活跃日期)
字段样例:
user_id | ftime |
---|---|
123 | 2022-07-10 |
234 | 2022-07-12 |
SOL返回字段:userid
答案
select distinct userid
from
( -- 添加排序序号
select userid
,row_number over(partition by userid order by ftime asc) as rnk
,ftime
from (select userid,ftime from active_info group by userid,ftime) t -- 去重
) t1
group by user_id,date_sub(ftime,rnk)
having count(1) >= 4
题2:如下几段SQL分别返回什么
数仓,数分相关工作经验,最近在面试招聘,3-8年工作经验,这三条返回全写对,10人不超过2人;
表:ta
id |
---|
1 |
2 |
4 |
null |
表:tb
id |
---|
2 |
2 |
4 |
3 |
- 语句1:
select ta.id, tb.id as id2 from ta left join tb on ta.id = tb.id and tb.id > 2
答案
id | id2 |
---|---|
4 | 4 |
1 | null |
2 | null |
null | null |
- 语句2:
select ta.id, tb.id as id2 from ta left join tb on ta.id = tb.id and ta.id < 3
答案
id | id2 |
---|---|
2 | 2 |
2 | 2 |
1 | null |
4 | null |
null | null |
- 语句3:
select ta.id,tb.id as id2 from ta full join tb on ta.id = tb.id where ta.id is null
答案
id | id2 |
---|---|
null | 3 |
null | null |
题3:编写SQL拉收6月专业公司为A、B、C的(只要这三个公司)
比较简单的一个行转列问题
小R用户数(月付费:[0.100)),
中R用户数 (月付费:[100,200)),
大R用户数 (月付费:[200,+∞))
表名:pay_info
字段
专业公司:business_cd
付费日期:statis_date(样式:2022-07-01)
付费金额:Pay
用户id:user_id
SQL返回字段(4个字段):
business_cd专公司,
large_pay_uv大R用户数,
mid_pay_uv中R付费用户数,
small_pay_uv小R付费用户数
答案
select business_cd
,count(case when pay < 100 then 1 end) as small_pay_uv -- 小R付费用户
,count(case when pay >= 100 and pay < 200 then 1 end) as mid_pay_uv
,count(case when pay >= 200 then 1 end) as large_pay_uv
from
( -- 先按专业公司,用户id对付费求和加总
select business_cd
,user_id
,sum(pay) as pay
from pay_info
where month(statis_date)= '6' and business_cd in ('A','B','C')
group by business_cd,user_id
) t
group by business_cd
题4:日期交叉问题
拉取当前平台活动天数,如果中间有重叠,重叠天数不计,比如xiaomi第一次活动是从2021-7-9到2021-7-15,第二次活动2021-7-14到2021-7-19,14号和15号重复,这两天只计一次,总共活动11天。
select brand -- 品牌
,sum(val) as act_days -- 活动天数
from
(
select brand
,start_date
,end_date
,start_date_up -- 开始日期上偏移1位
,end_date_up -- 结束日期上偏移1位
,rnk
,case when rnk=1 and start_date_up is null then DATEDIFF(end_date,start_date) + 1
when rnk=1 and start_date_up<=end_date then DATEDIFF(end_date_up,start_date) + 1
when rnk=1 and start_date_up>end_date then DATEDIFF(end_date,start_date) + DATEDIFF(end_date_up,start_date_up) + 2
when rnk>1 and start_date_up is null then 0
when rnk>1 and end_date_up<=end_date then 0
when rnk>1 and end_date_up>end_date then DATEDIFF(end_date_up,start_date_up)
end as val
from
(
SELECT *
,lead(start_date,1) over(partition by brand order by start_date asc) as start_date_up
,lead(end_date,1) over(partition by brand order by start_date asc) as end_date_up
,ROW_NUMBER() over(partition by brand order by start_date asc) as rnk
FROM `ods_act_mobile_info`
) t1
) t2
group by brand
order by act_days desc
该种方法,拉取截止上一条记录的最大值,计算
select brand
-- 表示只有一行数据
,sum(case when max_dt is null then DATEDIFF(end_date,start_date) +1
-- 表示上面一行的范围覆盖当前行,所以计0
when max_dt >= end_date then 0
-- 表示当前行活动范围与之前活动范围没有交叉,直接计天数
when max_dt < start_date then datediff(end_date,start_date)+1
-- 当前行活动范围与之前活动日期有交叉,取在之前最后活动后的活动天数
when max_dt < end_date and start_date<max_dt then DATEDIFF(end_date,max_dt) end) as act_days
from
( -- 该段子查询返回截图如下
SELECT *
,max(end_date) over(partition by brand order by start_date asc rows between unbounded preceding and 1 preceding) as max_dt
FROM `ods_act_mobile_info`
) t
group by brand
题5:分组问题
尚硅谷B站视频连接
返回如图示,间隔时间小于60秒划分到一个组,表:ods_user_login_time
select id -- 用户id
,ts -- 时间戳
,sum(case when last_ts is null then 1 -- 第一条记录,分组为1
when ts - last_ts < 60 then 0 -- 表示间隔小于60秒,分组不变,累加0
when ts - last_ts >= 60 then 1 end) -- 间隔大于60秒,分组值累加1
over(partition by id order by ts asc) as group -- 分组
from
(
select id,ts
,lag(ts,1) over(partition by id order by ts asc) as last_ts
from ods_user_login_time
) t
题6:间隔连续问题
表:ods_user_login_info
这种方法可以同样适用于间隔n天
select id
,max(consum_active_days) as max_consum_active_days -- 间隔1天,最大连续登录天数
from
(
select id,group
,sum(active_days) as consum_active_days
from
(
select id -- 用户id
,dt -- 时间戳
,sum(case when last_dt is null then 1 -- 第一条记录,分组为1
when datediff(dt,last_dt) <= 2 then 0 -- 表示间隔1天内,分组不变,累加0
datediff(dt,last_dt) > 2 then 1 end) -- 间隔大于1天,分组值累加1
over(partition by id order by ts asc) as group -- 分组
,case when last_dt is null then 1 -- 第一行数据
when datediff(last_dt,dt) > 2 then 0 -- 下一行数据与上一行相差大于1天,重新计算天枢
else datediff(last_dt,dt) end as active_days
from
(
select id,ts
,lag(ts,1) over(partition by id order by ts asc) as last_dt
from ods_user_login_time
) t
) t1
group by id,group
) t2
group by id