-- 数据预览:
select * from fct_sales limit 10;
-- 每月的新客户占比:
select first_month as m_month
,count(dimMemberID) AS user_cnt
,sum(if(is_new = 1,1,0)) as new_user_num
,sum(if(is_new = 1,1,0)) / count(dimMemberID) as new_rate
,sum(if(is_new = 0,1,0)) as old_user_num
,sum(if(is_new = 0,1,0))/ count(dimMemberID) as old_rate
from
(
SELECT dimMemberID
,date_format(min(dimDateID),'%Y%m') AS first_month
,date_format(max(dimDateID),'%Y%m') AS last_month
,if(date_format(min(dimDateID),'%Y%m') = date_format(max(dimDateID),'%Y%m'),1,0 ) as is_new
from fct_sales
group by dimMemberID -- 已去重
) t
group by first_month -- 以首月作为每月标识分组月份
order by m_month;
-- 验证:2017年8月的新客
select *
from
(
select dimMemberID
,min(dimDateID) as min_date
,max(dimDateID) as max_date
from fct_sales
group by dimMemberID
) t
where DATE_FORMAT(min_date,'%Y%m') != DATE_FORMAT(max_date,'%Y%m') -- 第一次及最后一次购买都是在同一个月份
and min_date regexp '201708'; -- 验证完毕:2017年08月确实没有新客户
计算逻辑更正:
with temp1 as (
select
distinct -- 添加distinct去重,主要是考虑到同一用户在激活当天可能可能下过两笔及以上订单
t2.user_id
,stimu_date
,active_date
,if (stimu_date = active_date,1,0) as flag
from
(
-- 计算得到每个用户的第一购买日期
SELECT dimMemberID as user_id
,min(dimDateID) as stimu_date
FROM fct_sales
where dimMemberID <> 0 -- 排除非会员
group by dimMemberID
) t1
right join
(
-- 计算得到每个用户每月的第一次购买日期
SELECT dimMemberID as user_id
,dimDateID as active_date
FROM fct_sales
where dimMemberID <> 0 -- 排除非会员
) t2
on t1.user_id = t2.user_id
)
select month(active_date) as mmonth
,sum(if (flag = 1,1,0 )) as new_cnt
,count(distinct user_id) as user_cnt # 此处不能用count(1)
,sum(if (flag = 1,1,0 ))/ count(distinct user_id) as rate
from temp1
group by month(active_date) with rollup;
最后统计结果应该是: