一、商品订单数据
数据表:订单表orders,字段:
order_id,user_id,order_pay,order_time,commodity_level_1,commodity_level_2;
题目1:求最近七天内每一个一级类目下成交额排名前三的二级类目;
SELECT
commodity_level_1,
commidity_level_2,
total_pay
FROM
(SELECT *,
rank ( ) over ( PARTITION BY commodity_level_1 ORDER BY total_pay DESC ) AS rank_desc
FROM
(SELECT
commodity_level_1,
commidity_level_2,
sum( order_pay ) AS total_pay
FROM orders
WHERE datediff( now( ), CONVERT ( order_time, date ) ) <= 7
GROUP BY
commodity_level_1,
commidity_level_2
) a ) b
WHERE rank_desc <=3
题目2:提取2020.12.27-2020.12.31每一天消费金额排名在1-3的user_id
SELECT
oredr_date,
user_id,
total_pay
FROM
(SELECT *,
rank ( ) over ( PARTITION BY order_date ORDER BY total_pay DESC ) AS rank_desc
FROM
(SELECT
CONVERT( order_time, date ) AS order_date,
user_id,
sum( order_pay ) AS total_pay
FROM orders
WHERE
CONVERT ( order_time, date ) BETWEEN '20201227' AND '20201231'
GROUP BY
CONVERT ( order_time, date ),
user_id
ORDER BY
CONVERT ( order_time, date )
) a ) b
WHERE rank_desc <=3
题目3:计算除去部门最高工资,和最低工资的平均工资(字节跳动面试)--窗口函数二、用户行为路径分析
select
a.deptno,
avg(a.salary)
from
(select
deptno,
salary,
rank()over(partition by deptno order by salary) as rank_1,
rank()over(partition by deptno order by salary desc) as rank_2
from emp
) a
where a.rank_1>1 and a.rank_2>1
用户行为表tracking_log,字段:user_id,opr_id,log_time
题目1:统计每天符合以下条件的用户数:A操作之后是B操作,AB操作必相邻
select
a.log_date,
count(a.user_id) as A_B_user
from
(SELECT
user_id,
covert(log_time,date) as log_date,
opr_id,
lag(opr_id)over(partition by user_id,convert(log_time,date) order by log_time) as next_opr_id
from tracking_log
) a
where a.opr_id='A' and a.next_opr_id='B'
group by a.log_date
题目2:统计每天符合用户行为序列为A-B-D的用户数,其中:A-B之间可以有其他任何浏览记录,而B-D之间除了C可以有其他任何浏览记录
select
a.log_date,
count(a.user_id) user_num
from
(select
user_id,
covert(log_time,date) as log_date,
group_concat(opr_id order by log_time) as user_behavior_path
from tracking_log
group by
user_id,
overt(log_time,date)
having user_behavior_path like 'A%B%D' and user_behavior_path not like 'A%B%C%D'
) a
group by
a. log_date
三、用户留存分析
用户登录表user_log,字段:user_id,log_date
题目1:求每天新增用户数,以及他们的次日留存率
select
a.first_date,
count(a.user_id) as new_num,
round(count(b.log_date)/count(a.user_id),2) as day1_retention
from
(select
user_id,
min(log_date) as first_date
from user_log
group by user_id) a
left join
user_log as b
on b.user_id=a.uesr_id and datediff(b.log_date,a.first_date)=1
group by a.first_date
题目2:留存的计算,每日新增用户数,次日,第3日,第7日留存率--窗口函数,自联结(pdd面试)
select
a.first_date,
count(a.user_id) as new_num,
round(count(distinct case when date_diff('day',cast(a.first_date as date),cast(b.log_date as date))=1 then user_id else null