1.每个用户最大连续登录天数
SELECT user_id,max(days) as '最大登录天数'
FROM
(SELECT distinct user_id
,count(1) days
FROM
(SELECT user_id
,active_date
,ROW_NUMBER() over(partition by user_id ORDER BY active_date) as rk
,active_date-ROW_NUMBER() over(partition by user_id ORDER BY active_date) as diff
FROM
(SELECT DISTINCT user_id,
DATE(problem_create_time) active_date
FROM
pbm_time where problem_create_time is not null
) t
) t1
GROUP BY user_id,diff
) t2 GROUP BY user_id
ORDER BY 最大登录天数 desc ;
2.次日/三日留存率
SELECT
first_day
,count(distinct user_id) uv
,sum(case when diff_day=1 then 1 else 0 end)/count(DISTINCT user_id) day_1
,sum(case when diff_day=2 then 1 else 0 end)/count(DISTINCT user_id) day_2
,sum(case when diff_day=3 then 1 else 0 end)/count(DISTINCT user_id) day_3
,sum(case when diff_day=7 then 1 else 0 end)/count(DISTINCT user_id) day_7
,sum(case when diff_day=15 then 1 else 0 end)/count(DISTINCT user_id) day_15
,sum(case when diff_day=30 then 1 else 0 end)/count(DISTINCT user_id) day_30
,sum(case when diff_day=60 then 1 else 0 end)/count(DISTINCT user_id) day_60
FROM
(SELECT user_id
,active_date
,first_value(active_date) over(partition by user_id ORDER BY active_date) as first_day
,DATEDIFF(active_date,first_value(active_date) over(partition by user_id ORDER BY active_date)) as diff_day
FROM
(SELECT user_id,
DATE(problem_create_time) active_date
FROM
pbm_time where problem_create_time is not null
GROUP BY user_id,active_date
) t
) t1
where first_day>=('2021-01-01')
GROUP BY first_day;
三日/七日内的留存率
SELECT
active_date
,count(distinct user_id) uv
,sum(case when diff_day=1 then 1 else 0 end)/count(DISTINCT user_id) day_1
,sum(case when diff_day in (1,2) then 1 else 0 end)/count(DISTINCT user_id) day_2
,sum(case when diff_day<=7 then 1 else 0 end)/count(DISTINCT user_id) day_7
,sum(case when (diff_day!=0 and diff_day<=30) then 1 else 0 end)/count(DISTINCT user_id) day_30
,sum(case when (diff_day!=0 and diff_day<=60) then 1 else 0 end)/count(DISTINCT user_id) day_60
FROM
(SELECT user_id
,active_date
,first_value(active_date) over(partition by user_id ORDER BY active_date) as first_day
,DATEDIFF(active_date,first_value(active_date) over(partition by user_id ORDER BY active_date)) as diff_day
FROM
(SELECT user_id,
DATE(problem_create_time) active_date
FROM
pbm_time where problem_create_time is not null
GROUP BY user_id,active_date
) t
) t1
where active_date>=('2021-01-01')
GROUP BY active_date;
3.中位数
数据源
奇数行
SELECT
round(avg(income)) AS 中位数
FROM (SELECT
id
,income
,count(1) over() AS cnt
,row_number() over(order by income) AS rownum
FROM employee
GROUP BY id,income
) t
WHERE rownum in ((cnt+1)div 2,(cnt+2)div 2) ;
偶数行
wITH temp_rk AS
(SELECT
id
,income
,count(1) over() AS cnt
,row_number() over(order by income) AS rownum
FROM employee
WHERE id<5 #偶数行
GROUP BY id,income
)
SELECT
round(avg(income)) AS 中位数
FROM temp_rk
WHERE rownum in
((cnt+1)div 2,(cnt+2)div 2)
4.四分位
SELECT per_rank,avg(age) age
FROM
(select
CONVERT(insure_age,SIGNED) age
,PERCENT_RANK() OVER(ORDER BY CONVERT(insure_age,SIGNED) )
,round(PERCENT_RANK() OVER(ORDER BY CONVERT(insure_age,SIGNED) ),2) AS "per_rank"
FROM pbm_time
WHERE insure_age is not null
) t
WHERE t.per_rank in (0,0.23,0.50,0.75,1)
GROUP BY per_rank