--Basic facts gathering
--1.
--按每辆单车的使用用户数排序,没有被使用的次数none排在最后
SELECT b.id, t.u_num
FROM bikes b
LEFT JOIN (
SELECT bike_id, COUNT(DISTINCT user_id) AS u_num
FROM trips
WHERE started_at >= '2017-06-01'
AND completed_at <= '2017-06-30'
AND status = 'completed'
GROUP BY bike_id
) t
ON b.id = t.bike_id
ORDER BY t.u_num DESC
--按每辆单车的trip次数排序,没有trip的none排在最后
SELECT b.id, t.num
FROM bikes b
LEFT JOIN (
SELECT bike_id, COUNT(1) AS num
FROM trips
WHERE started_at >= '2017-06-01'
AND completed_at <= '2017-06-30'
AND status = 'completed'
GROUP BY bike_id
) t
ON b.id = t.bike_id
ORDER BY t.num DESC
--2.
/*假设1)TestPromo这个促销活动的名字是唯一的
2)trip里跟促销活动有关的只有coupon_id,所以假设用了TestPromo的券才算参加了活动
3)只算status='completed'的,且第一天定义为trips.completed_at=promotions.start_at
表格表头:地区id/旅程数/用户数/第一天旅程百分比/第一天用户百分比
*/
SELECT total_table.re_id,
total_table.旅程数, total_table.用户数,
CAST(CAST(first_day.第一天旅程数 * 1.0 * 100 / total_table.旅程数 AS decimal(10, 2)) AS varchar(50)) + '%' AS '第一天旅程百分比',
CAST(CAST(first_day.第一天用户数 * 1.0 * 100 / total_table.用户数 AS decimal(10, 2)) AS varchar(50)) + '%' AS '第一天用户百分比'
FROM (
SELECT t.region_id AS re_id, COUNT(1) AS '第一天旅程数', COUNT(DISTINCT t.uer_id) AS '第一天用户数'
FROM trips t
JOIN coupons c ON t.coupon_id = c.id
JOIN promotions p ON c.promotion_id = p.id
WHERE p.promotion_name = 'TestPromo'
AND t.status = 'completed'
AND t.completed_at = p.start_at
GROUP BY t.region_id
) first_day
INNER JOIN (
SELECT t.region_id AS re_id, COUNT(1) AS '旅程数', COUNT(DISTINCT t.uer_id) AS '用户数'
FROM trips t
JOIN coupons c ON t.coupon_id = c.id
JOIN promotions p ON c.promotion_id = p.id
WHERE p.promotion_name = 'TestPromo'
AND t.status = 'completed'
GROUP BY t.region_id
) total_table
ON first_day.re_id = total_table.re_id
--Data transformation
--3.
/*传入参数为用户id,返回一个表变量@bac
这一问假设1)status can be: `started` `failed` `completed`三种均可
2)判定最后使用的依据是不为空的创建订单时间created_at
3)不一定每笔订单会使用优惠券,所以要查找不为空的值
表头:id/最后使用过的自行车id/最后使用过的优惠券id
*/
create function bikeandcoupon(@uid int)
returns @bac TABLE (id int,lu_bike int,lu_coupon int )
as
begin
insert into @bac(id,lu_bike)
select @uid as id,bike_id
from trips
where user_id=@uid and created_at=
(select max(created_at) from trips where user_id=@uid )
update @bac set
lu_coupon=
(select coupon_id from trips
where user_id=@uid and coupon_id is not NULL
and created_at=(select max(created_at) from trips where user_id=@uid and coupon_id is not NULL))
where id=@uid
return
end
go
--4.
/*只用了单表查询,如果没有出现在trip表里的用户则从来没骑过单车不统计在表里;
begin_balance题目没有给出解释,我在这里解释为某一天刚开始的余额
比如开始骑单车的第一天begin_balance=0,当天spent_amount_cents=5
那么第二天begin_balance=-5,当天spent_amount_cents=2
第二天begin_balance=-7依次类推
虽然它反映不了用户目前的总金额,但是变量名为"begin"我考虑再三选择了这个
*/
SELECT t.completed_at as 'date',
t.user_id as user_id,
t.num_trips as num_trips,
t.spent_amount_cents as spent_amount_cents,
-SUM(t.spent_amount_cents) OVER
(PARTITION BY user_id ORDER BY completed_at
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)+t.spent_amount_cents
as begin_balance
FROM
(SELECT DISTINCT completed_at ,user_id,
SUM(cost_amount_cents) OVER (PARTITION BY completed_at,user_id) as spent_amount_cents,
COUNT(1) OVER (PARTITION BY completed_at) as num_trips
FROM trips)t
--Integrated problem solving
--5.
/*每个地区2017.6的报告
假设活跃用户为在6月出现在trips表里的用户
活跃的单车为在6月出现在trips表里(被骑过)的车
*/
SELECT region_id, SUM(cost_amount_cents) AS gross_revenue
, SUM(cost_amount_cents - refunded_amount_cents) AS net_revenue
, COUNT(DISTINCT user_id) AS number_of_users, COUNT(1) AS number_of_trips
, COUNT(DISTINCT bike_id) AS number_of_bikes
FROM trips
WHERE started_at >= '2017-06-01'
AND completed_at <= '2017-06-30'
AND status = 'completed'
GROUP BY region_id
--6.
/*
选择的指标如下
参与的用户数:参加活动的用户
trip数:活动期间的旅程数
收入:参与活动产生的利润
这三个指标分别对应活动参与人数、成交量、收入,是衡量一个活动很常用的指标
sql 语句假设跟第2题类似
1)trips里跟促销活动有关的只有coupon_id,所以参加了活动就用了该活动的券
3)只算status='completed'的
*/
SELECT p.id, COUNT(DISTINCT t.uer_id) AS '用户数', COUNT(1) AS '旅程数'
, SUM(t.cost_amount_cents - t.refunded_amount_cents) AS '净收入'
FROM trips t
JOIN coupons c ON t.coupon_id = c.id
JOIN promotions p ON c.promotion_id = p.id
GROUP BY p.id
--7.假设流失率间隔为7天
/*创建new_user表
(id,created_at)*/
CREATE TABLE new_user
SELECT id, created_at AS 'day'
FROM users
/*创建activity表
假设在当天创建了订单就为活跃,不论旅程是否完成,
所以不选completed_at,选created_at*/
CREATE TABLE activity
SELECT user_id AS id, created_at AS 'day'
FROM trips
--每天的新用户总数表
CREATE TABLE cohort_size
SELECT day, COUNT(id) AS size
FROM new_user
GROUP BY day
SELECT day, period, new_users, retained_users, retention_rate
, churn_rate
FROM (
SELECT new_user.day, future_ac.day - new_usery.day AS period, cohort_size.size AS new_users
, COUNT(DISTINCT future_ac.id) AS retained_users
, COUNT(DISTINCT future_ac.id) / MAX(cohort_size.size) AS retention_rate
, 1 - COUNT(DISTINCT future_ac.id) / MAX(cohort_size.size) AS churn_rate
FROM new_user
LEFT JOIN activity future_ac
ON new_user.id = future_ac.id
AND new_user.day < future_ac.day
AND future_ac.day <= dateadd(day, 7, new_user.day)
LEFT JOIN cohort_size ON new_user.day = cohort_size.day
GROUP BY 1, 2
) t
WHERE period IS NOT NULL
ORDER BY day, period
SQL笔试
最新推荐文章于 2024-06-30 22:09:00 发布