记录一下sql笔试面试中遇到的疑难杂症
导航
1.连续登录问题
现在有个登录日志表log,字段有id,time,尝试找出连续登录超过7天的用户id
select id
from
(
select id,date_sub(login_date,interval login_rank day) as first_day_per,
count(login_date) as continue_days
from
(
select id,login_date,row_number(partition by id order by login_date) as login_rank
from
(select id,distinct date(time) as login_date
from log) t1
) t2
group by id,date_sub(login_date,interval login_rank day)
) t3
group by id
having max(continue_days)>7
2.留存率/复购率计算
- 计算每天的次日留存,7日留存,30日留存,90日留存率
select register_day,
sum(case when remain_days=1 then 1 else 0 end)/count(distinct id) as remain_1,
sum(case when remain_days=7 then 1 else 0 end)/count(distinct id) as remain_7,
sum(case when remain_days=30 then 1 else 0 end)/count(distinct id) as remain_30,
sum(case when remain_days=90 then 1 else 0 end)/count(distinct id) as remain_90,
from(
select id,register_day,login_day,datediff(login_day,register_day) as remain_days
from(
(select id,distinct date(time) as login_day
from log) t1
left join
(select id,min(date(time)) as register_day
from log
group by id) t2
on t1.id=t2.id
) t3
) t4
group by register_day
- 计算在 2021 年 7 月 1 日下单的用户在其后的 15 天内复购率
select count(distinct b.user_id)/count(distinct a.user_id) as rebuy_rate
from
(select distinct date,user_id
from order_log
where date='2021-07-01') a
left join
(select distinct date,user_id
from order_log) b
on a.user_id=b.user_id and a.date<b.date
where b.date<=date_sub(cast('2021-07-01') as date),interval -15 day)
3.行列互换
4.一行变多行,多行变一行
mysql根据逗号将一行数据拆分成多行数据
SELECT
a.id,a. NAME,
substring_index(
substring_index(
a.shareholder,
',',
b.help_topic_id + 1
),
',' ,- 1
) AS shareholder
FROM
company a
JOIN mysql.help_topic b ON b.help_topic_id < (
length(a.shareholder) - length(
REPLACE (a.shareholder, ',', '')
) + 1
)
解释详见here
mysql根据逗号将多行数据合并成一行数据
SELECT USER_NAME,GROUP_CONCAT(COURSE) COURSE FROM GRADE
GROUP BY USER_NAME;
详见here
5.二度好友
已知好友表table,id1,id2代表两列中的用户为好友关系,如1 2代表1和2为好友关系(不会出现 2 1,需要自己构造),而
二度好友的定义为:自己朋友的朋友中自己不认识的人
求表中每个用户的二度好友个数。
SELECT
t3.id1,
COUNT(DISTINCT t3.id2)
FROM
(
SELECT
t1.id1,
t2.id2
FROM
(
SELECT uid1 AS id1, uid2 AS id2 FROM TABLE
UNION ALL
SELECT uid2 AS id1, uid1 AS id2 FROM TABLE
)
t1
LEFT JOIN
(
SELECT uid1 AS id1, uid2 AS id2 FROM TABLE
UNION ALL
SELECT uid2 AS id1, uid1 AS id2 FROM TABLE
)
t2
ON
t1.id2 = t2.id1
WHERE
t1.id1 != t2.id2 --去除自己
)
t3
LEFT JOIN
(
SELECT uid1 AS id1, uid2 AS id2 FROM TABLE
UNION ALL
SELECT uid2 AS id1, uid1 AS id2 FROM TABLE
)
t4
ON
t3.id1 = t4.id1
AND t3.id2 = t4.id2
WHERE
t4.id2 IS NULL --去除自己的好友
GROUP BY
t3.id1
6.累加累乘累减累除
主要是通过变现的累加来实现
累乘变形思路:a*b=exp(lna+lnb)
SELECT
T.*,
ROUND(EXP(SUM(LN(T.QUALITY)) OVER(PARTITION BY T.PARENT_ID ORDER BY T.PART_ID)),0) RUNNING_PROD
FROM
TEST T;
累减变形思路:a-b=a+(-b)
SELECT
T.PARENT_ID,
T.PART_ID,
T.QUALITY,
SUM(if(T.RN=1,T.QUALITY,-T.QUALITY)) OVER(PARTITION BY T.PARENT_ID ORDER BY T.PART_ID) RUNNING_PROD
FROM
(
SELECT
T.*,
ROW_NUMBER() OVER(PARTITION BY T.PARENT_ID ORDER BY T.PART_ID) RN
FROM
TEST T) T;
累除变形思路:a/b=exp(lna-lnb)
SELECT
T.PARENT_ID,
T.PART_ID,
T.QUALITY,
EXP(SUM(if(RN=1,LN(T.QUALITY),-LN(T.QUALITY))) OVER(PARTITION BY T.PARENT_ID ORDER BY T.PART_ID)) RUNNING_PROD
FROM
(
SELECT
T.*,
ROW_NUMBER() OVER(PARTITION BY T.PARENT_ID ORDER BY T.PART_ID) RN
FROM
TEST T)T;
7.直播间最大在线人数
-- 直播间同时在线人数
select max((t3.enter_users-t4.out_users)) as max_online_users
from
(
select t1.login_time,count(t2.uid) as enter_users
from
(
select login_time
from live_show_flow
group by login_time
) t1
left join live_show_flow t2
on t1.login_time>=t2.login_time
group by t1.login_time
) t3
left join
(
select t1.login_time,count(t2.uid) as out_users
from
(
select login_time
from live_show_flow
group by login_time
) t1
left join live_show_flow t2
on t1.login_time>=t2.logout_time
group by t1.login_time
) t4
on t3.login_time=t4.login_time