红包参与领取、活跃用户等相关9题

用户活跃模型表. 表名:tmp_dau_based
字段解释:

  1. imp_date 日期 int格式.
  2. qimei 用户唯一标识(相当于用户id) int格式 无空值.
  3. is_new 新用户标识 int格式. (1标识新用户 0标识老用户)

备注:一个用户1天只出现1次,出现即表示当日登陆。
在这里插入图片描述
红包参与模型表. 表名:tmp_packet_based

字段解释:

  1. imp_date 日期 int格式.
  2. qimei 用户唯一标识(相当于用户id) int格式 无空值.
  3. report_time 领取时间戳 int格式.
  4. add_money 领取金额 int格式.(单位为分 无空值或0值)

备注:日志流水表,每一行为领取1次红包。无特殊说明,一般不考虑领取红包但当日未登录的情况。
(为什么会出现“已领取红包,但未登录的情况”,因为在现实的数据中,数据上报无法做到十分准确,这种其实是BUG数据。需要开发完善上报。)

在这里插入图片描述



-- 36、计算2019年6月1日至6月20日每日DAU

SELECT  imp_date
			 ,COUNT(DISTINCT qimei) as dau
FROM tmp_dau_based
WHERE imp_date BETWEEN '20190601'and '20190620'
GROUP BY imp_date;
	
	
-- 37、按新老用户区分,计算2019年3月1日至3月10日,每天领取红包的用户数,人均领取金额,人均领取次数

SELECT
			   t1.imp_date 
			  ,t2.is_new  
			  ,COUNT( DISTINCT t1.qimei )  AS  user_cnt
			  ,AVG(t1.packet_amount )  AS avg_packet_amount
			  ,AVG(t1.packet_times)  AS avg_packet_times
FROM
( -- 计算每位用户每天的领取金额及领取次数
		SELECT   imp_date
	          ,qimei
	          ,SUM( add_money )  AS  packet_amount
	          ,COUNT(1) AS packet_times
		FROM  tmp_packet_based 
		WHERE imp_date BETWEEN '20190301'  AND '20190310'
    GROUP  BY  imp_date
	            ,qimei
  ) t1
LEFT JOIN 
(
SELECT     imp_date
          ,qimei
          ,is_new
FROM  tmp_dau_based
WHERE imp_date BETWEEN '20190301'  AND '20190310' 
)  t2
ON t1.imp_date = t2.imp_date AND t1.qimei = t2.qimei
GROUP BY    t1.imp_date
	         ,t2.is_new 
ORDER BY    t1.imp_date
	         ,t2.is_new;
	
	
	
	
# 38. 计算2019年3月至6月,每个月领取过红包用户和未领取过红包用户的数量、平均月活跃天数(既本月平均活跃多少天)

-- 恭喜您,答对了 !!!

/*
-- 逻辑梳理:
维度:每个月(时间标签)、是否领取红包(属性标签)
指标:活跃用户数、平均活跃天数( 1. 先计算每个自然月、每个用户的活跃天数; 2. 只保留自然月维度,计算平均天数)

-- 解题思路:
1.按天看人均领取金额和人均领取次数,所以要算出每天每人的领取金额和领取次数,然后再做平均。
2.还需按每天的新老用户进行区分,所以在a表里面取出每个用户是否是新老用户的字段,然后a表和b表关联,最后再聚合。

*/

SELECT     imp_month
         , is_received
			   , COUNT(DISTINCT qimei)  as user_cnt
			   , AVG(per_user_active_days)  as avg_active_days
FROM 
(
      -- 每月每位活跃用户是否领取红包、以及活跃天数标签
	  SELECT    t1.imp_month
					  , t1.qimei
            , t1.per_user_active_days
					  , IF(t2.imp_month IS NULL , 0, 1)   AS   is_received         
	  FROM 
	  ( -- 计算活跃用户每月每位用户的活跃天数
			SELECT    DATE_FORMAT( imp_date,'%Y-%m') as imp_month
							 ,qimei
		           ,COUNT(DISTINCT imp_date)  AS   per_user_active_days
			FROM   tmp_dau_based
			WHERE  imp_date >= '20190301' AND   imp_date <  '20190701' 
			GROUP BY   DATE_FORMAT( imp_date,'%Y-%m')
							  ,qimei
	  )  t1
	  LEFT JOIN
	  (
		  -- 红包领取日志表中的按每月、每位用户需进行去重操作
		  SELECT    DATE_FORMAT( imp_date,'%Y-%m') as imp_month
						   ,qimei
		  FROM   tmp_packet_based
		  WHERE  imp_date >= '20190301' AND   imp_date <  '20190701' 
		  GROUP BY  DATE_FORMAT( imp_date,'%Y-%m')
							  ,qimei
	  ) t2
	  ON t1.imp_month =   t2.imp_month  AND  t1.qimei =   t2.qimei
) t 
GROUP BY  imp_month
         ,is_received
ORDER BY  imp_month
         ,is_received;

				 
-- 39.计算2019年3月6日所有老用户领取的第一个红包金额,若用户没有领取红包则金额为0	
	

/*
所有老用户: is_new  = 0
领取的第一个红包金额: ①第一个红包   ②金额
若用户没有领取红包则金额为0

*/

SELECT     t1.imp_date
				  ,t1.qimei
				  ,IFNULL(t2.add_money,0)  AS  packet_amount
FROM
(
-- 2019年3月6日所有老用户
SELECT  qimei, imp_date
FROM  tmp_dau_based
WHERE  imp_date = '20190306'
				AND is_new = 0
)  t1
LEFT JOIN
(
-- 领取的第一个红包金额
SELECT  qimei , add_money
FROM
  (
	  SELECT   qimei
					 , add_money
					 , row_number()over( PARTITION BY  qimei  ORDER BY report_time asc ) as  rn
	  FROM  tmp_packet_based
	  WHERE  imp_date = '20190306'
  ) t	
WHERE  rn = 1
)  t2
ON  t1.qimei  = t2.qimei ;
	
-- 40.计算2019年6月1日至6月10日,每日领取红包的新用户数、老用户数及其人均领取金额,人均领取次数,要考虑【领取红包但当日未登录】的情况

SELECT
			   t1.imp_date 
				 -- 2 代表领取红包但当日未登录
			  ,ifnull(t2.is_new,2) AS is_new   
			  ,COUNT( DISTINCT t1.qimei )  AS  user_cnt
			  ,AVG(t1.packet_amount )  AS avg_packet_amount
			  ,AVG(t1.packet_times)  AS avg_packet_times
FROM
( -- 计算每位用户每天的领取金额及领取次数
		SELECT   imp_date
	          ,qimei
	          ,SUM( add_money )  AS  packet_amount
	          ,COUNT(1) AS packet_times
		FROM  tmp_packet_based 
		WHERE imp_date BETWEEN '20190601'  AND '20190610'
    GROUP  BY  imp_date
	            ,qimei
  ) t1
LEFT JOIN 
(
SELECT     imp_date
          ,qimei
          ,is_new
FROM  tmp_dau_based
WHERE imp_date BETWEEN '20190601'  AND '20190610'
)  t2
ON t1.imp_date = t2.imp_date AND t1.qimei = t2.qimei
GROUP BY    t1.imp_date
	         ,t2.is_new 
ORDER BY    t1.imp_date
	         ,t2.is_new;
					 
-- 41.计算2019年5月1日,每个新用户领取的第一个红包和第二个红包的时间差(只计算注册当日有领取红包的用户,注册当日以DAU表中新用户为1的用户) 				 

-- UNIX时间戳转换为日期用函数: FROM_UNIXTIME()				 

/*
-- 每个新用户领取情况

注意:每个表的详细级别

难点:怎么筛出来每位用户,第一个红包,第二个红包的数据

*/

-- 解法1:简洁,通过
SELECT    t1.imp_date
 				 ,t1.qimei
				 ,min(report_time)  as  first_action_time
				 ,max(report_time)  as  second_action_time
				 ,TIMESTAMPDIFF(second , min(report_time), max(report_time))  AS time_interval
FROM 
(
  SELECT  imp_date
				 ,qimei
  FROM  tmp_dau_based
  WHERE  imp_date = '20190501'  AND  is_new = 1
)  t1
INNER JOIN 
(
  SELECT  qimei, report_time
  FROM  
   ( 
	  SELECT  imp_date
					 ,qimei
					 ,FROM_UNIXTIME(report_time) report_time
					 ,row_number()over(PARTITION BY qimei ORDER BY report_time ASC)  AS rn
	  FROM  tmp_packet_based
	  WHERE  imp_date = '20190501' 
   ) t
  WHERE  rn < 3
)   t2
ON  t1.qimei = t2.qimei
GROUP BY  t1.imp_date
 				 ,t1.qimei;


-- 解法2:略显冗余, 不通过(实际是对的)
SELECT *
FROM 
(
SELECT    t1.imp_date
 				 ,t1.qimei
				 ,report_time  rn1
				 ,lead(report_time,1)over(PARTITION BY t1.qimei ORDER BY report_time ASC)    rn2
				 ,TIMESTAMPDIFF(second ,report_time,lead(report_time,1)over(PARTITION BY t1.qimei ORDER BY report_time ASC) )  AS time_interval
FROM 
(
SELECT  imp_date
               ,qimei
FROM  tmp_dau_based
WHERE  imp_date = '20190501'  AND  is_new = 1
)   t1
INNER JOIN 
(
SELECT  qimei, report_time
FROM  
 ( 
	SELECT  imp_date
				   ,qimei
				   ,FROM_UNIXTIME(report_time) report_time
				   ,row_number()over(PARTITION BY qimei ORDER BY report_time ASC) rn
	FROM  tmp_packet_based
	WHERE  imp_date = '20190501' 
 ) t
WHERE  rn < 3
)   t2
ON  t1.qimei = t2.qimei
 ) t
WHERE t. rn2 is not null;
					 

-- 42. 计算2019年6月1日至6月20日,每日的用户次日留存率、领取红包用户的次日留存率、未领取红包用户的次日留存率

# 难点:对于未领取、领取红包用户如何筛选计算,以及如何减少不必要的sql代码冗余



SELECT  t1.imp_date
       ,COUNT(DISTINCT t2.qimei) / COUNT(DISTINCT t1.qimei) AS  retention
       ,COUNT(DISTINCT CASE WHEN is_packet_user = 1 THEN t2.qimei END) / COUNT(DISTINCT CASE WHEN is_packet_user = 1 THEN t1.qimei END) AS  packet_retention
       ,COUNT(DISTINCT CASE WHEN is_packet_user = 0 THEN t2.qimei END) / COUNT(DISTINCT CASE WHEN is_packet_user = 0 THEN t1.qimei END) AS  non_packet_retention
FROM
(
    -- 对每位活跃用户在每天是否领取红包,打上标签,0 表示未领取,1表示领取
    SELECT  a.imp_date
					 ,a.qimei
					 ,IF(b.qimei IS NULL, 0, 1) AS  is_packet_user
    FROM( -- 建议优化:缩小数据范围
		SELECT DISTINCT imp_date,qimei
		FROM tmp_dau_based
		WHERE imp_date BETWEEN '20190601' AND '20190620'
		) a
    LEFT JOIN (
    -- 对红包参与模型表进行每用户按日去重
		SELECT  imp_date
					 ,qimei
		FROM tmp_packet_based
		WHERE imp_date BETWEEN '20190601' AND '20190620'
		GROUP BY imp_date
						,qimei
    ) b ON a.imp_date = b.imp_date AND a.qimei = b.qimei
) t1
LEFT JOIN
(
    SELECT  imp_date
					 ,qimei
    FROM tmp_dau_based
    WHERE imp_date BETWEEN '20190602' AND '20190621'
) t2 
ON t1.imp_date = (t2.imp_date - 1) AND t1.qimei = t2.qimei
GROUP BY t1.imp_date;


-- dathon参考答案:相对更简洁

with a as (
		select  distinct imp_date,qimei  
		from tmp_dau_based
		where  imp_date between '20190601' and '20190621'
),b as (  select distinct  imp_date, qimei
					from tmp_packet_based
					where imp_date between '20190601' and '20190620'
)
select
			t1.imp_date,
			count( distinct t2.qimei) / count(distinct t1.qimei)   retention,
			count( distinct if(t1.is_packet_user = 1, t2.qimei, null)) / count(distinct if(t1.is_packet_user = 1, t1.qimei, null))   packet_retention,
			count( distinct if(t1.is_packet_user = 0, t2.qimei, null)) / count( distinct if(t1.is_packet_user = 0, t1.qimei, null))  non_packet_retention
from
  ( -- t1 表找出每日的登录用户并标记用户是否领取红包,作为留存率的分母
    select    a.imp_date,
							a.qimei,
							if(b.qimei is null, 0, 1) is_packet_user
    from a
    left join b 
		on a.imp_date = b.imp_date and a.qimei = b.qimei
    where a.imp_date between '20190601' and '20190620'
    group by   a.imp_date,
               a.qimei
  ) t1
-- 当日登录的用户有多少在次日仍然登录,需要对登录表进行连接操作,即t1 left join t2 ( t2 即临时表a )
left join a t2 
on t1.qimei = t2.qimei and datediff(t2.imp_date, t1.imp_date) = 1 -- 在dathon的基础上进行了代码更正
group by  t1.imp_date;


-- 43. 计算2019年6月1日至6月20日,每日DAU中,近3天连续登陆用户数,近3天有领取红包用户数,近3天连续登陆用户的占比,近3天有领取红包用户的占比

/*

近3天连续登陆用户数
近3天有领取红包用户数

选定时间范围: 20190601~ 20190620

难点:连续登录问题通用解法思路

*/


-- dathon:

SELECT     a.imp_date
          ,COUNT(DISTINCT a.qimei) AS dau
					,COUNT(DISTINCT b.qimei) AS 3d_user_cnt
					,COUNT(DISTINCT c.qimei) AS 3d_packet_cnt
					,COUNT(DISTINCT b.qimei) / COUNT(DISTINCT a.qimei) AS 3d_user_rate
					,COUNT(DISTINCT c.qimei) / COUNT(DISTINCT a.qimei) AS 3d_packet_rate
FROM ( 
			-- a表求出每天登录的用户
		  SELECT   imp_date
			        ,qimei
		  FROM tmp_dau_based
		  WHERE imp_date BETWEEN '20190601' AND '20190620'
) a
LEFT JOIN (
		  -- b表中,将用户的登录日减去其登录日的排序即可得到date_base字段,相同的date_base出现几次即意味着连续登陆了几天
			SELECT   qimei
							,date_add(date_base, INTERVAL 3 DAY) AS dt
							,COUNT(1) AS cnt
			FROM(
						SELECT     qimei
						          ,imp_date
											,rank() OVER (PARTITION BY qimei ORDER BY imp_date) AS date_rank
											,date_sub(imp_date, INTERVAL (rank() OVER (PARTITION BY qimei ORDER BY imp_date) ) DAY) AS date_base
						FROM tmp_dau_based
						WHERE imp_date BETWEEN '20190530' AND '20190620'
			) t
			GROUP BY  qimei
							 ,date_add(date_base, INTERVAL 3 DAY)
			HAVING COUNT(1) >= 3
	) b
ON a.qimei = b.qimei AND date(a.imp_date) = b.dt
LEFT JOIN (
			-- c表求取出1-20日领取红包的用户,在表连接的时候限制在近3天即可表示3天内有领取红包的用户
			SELECT qimei, imp_date
			FROM tmp_packet_based
			WHERE imp_date BETWEEN '20190530' AND '20190620'
			GROUP BY  qimei
							 ,imp_date
) c
ON  a.qimei = c.qimei AND (( 0 < datediff(a.imp_date, c.imp_date) ) AND ( datediff(a.imp_date, c.imp_date) <=2))
GROUP BY a.imp_date;



-- 草稿:
SELECT 0 < datediff('20190601', '20190610') <= 2;  # 大在前, 小在后;  



-- 44. 计算2019年6月1日,领取红包用户领取金额的中位数(用户粒度的中位数,而非红包粒度的中位数)最高的20个用户及排名

SELECT
				 imp_date
				,qimei
				,median_money
				,rn 
FROM
(
			SELECT    a.imp_date
							 ,a.qimei
               ,CAST(FORMAT(add_money,4)  AS DECIMAL(18, 4))  AS median_money
							 ,rank() over ( PARTITION BY a.imp_date ORDER BY  add_money  DESC ) rn 
			FROM(
			-- 计算每个用户领取红包的排名
						SELECT
										 imp_date
										,qimei
										,add_money
										,row_number() over ( PARTITION BY imp_date, qimei ORDER BY add_money ) AS rk 
						FROM tmp_packet_based 
						WHERE imp_date = '20190601' 
			) a
			LEFT JOIN ( 
			-- 计算每个用户领取红包的个数
						 SELECT    imp_date
											,qimei
											,count(*) AS num 
							FROM tmp_packet_based 
							WHERE imp_date = '20190601' 
							GROUP BY  imp_date
											 ,qimei
			) b 
			ON a.qimei = b.qimei AND a.imp_date = b.imp_date 
			-- 根据中位数的定义,其rk排名会落在总数+1再除以二的正负0.5之间
			WHERE rk >= ( num + 1 ) / 2 - 0.5 AND rk <= ( num + 1 ) / 2 + 0.5 
) t 
WHERE  rn <= 20;



-- 以字符串形式返回的,然后使用 CAST 函数将其转换回数值类型
SELECT CAST(FORMAT(20,4)  AS DECIMAL(18, 4));

	
	

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值