电商用户行为数据分析
用到的sql知识点:
增:alter table add(增加列)
删:drop table (删除临时表)
改:update name set 列=replace(列名,n,m)
查:分组group by、连接left join、判断case when then end、日期字符串转换str_to_date、窗口函数sum()over(order by )、连续排序row_number、临时视图creat table as select、日期差datediff(n,m)、条件查询<=> !=
1、tableau故事
2、sql代码
一、数据探索
use jd2;
desc clea_data
select * from clea_data limit 3
delete from clea_data where action_date='2018-03-27' or action_date='2018-03-28'
二、数据分析
1 查询uv和pv趋势变化
select action_date 日期,count(user_id) pv,count(distinct user_id) uv
from clea_data where type='浏览' group by action_date;
2查询订单趋势
select action_date 日期,count(user_id) 订单
from clea_data where type='下单' group by action_date
3查询首购用户数趋势
select s1.日期,count(s1.user_id) 首购
from (select user_id,min(action_date) 日期
from clea_data where type='下单' and reg_date>'2018-02-01' group by user_id) s1 group by 日期 order by 日期
4.新老用户uv数据比较
select s1.日期,新客uv,老客uv from
(select action_date 日期,count(distinct user_id) 新客uv
from clea_data where type='浏览' and reg_date>='2018-02-01' group by action_date) s1 inner join
(select action_date 日期,count(distinct user_id) 老客uv
from clea_data where type='浏览' and reg_date <'2018-02-01' group by action_date) s2 on s1.日期=s2.日期
5.新老用户订单数据比较
select s1.日期,新客订单,老客订单 from
(select action_date 日期,count(distinct user_id) 新客订单
from clea_data where type='下单' and reg_date>='2018-02-01' group by action_date) s1 inner join
(select action_date 日期,count(distinct user_id) 老客订单
from clea_data where type='下单' and reg_date <'2018-02-01' group by action_date) s2 on s1.日期=s2.日期
6.查询新增注册人数年趋势变化
select reg_date 注册日期,count(user_id) 注册人数 from clea_data group by reg_date
7、查询2018年新增注册人数变化
select reg_date 注册日期,count(user_id) 注册人数 from clea_data where reg_date>='2018-01-01' group by reg_date
8.查询注册时间与首购时间间隔天数分布
###增加日期差列
alter table clea_data add diff int
###新建日期差
update clea_data set diff=DATEDIFF(action_date,reg_date)
###绘制分桶视图
create table 首购分桶 as
select user_id,(
case when 首购时间=0 then '0'
when 首购时间 between 1 and 3 then '[1,3]'
when 首购时间 between 4 and 7 then '[4,7]'
when 首购时间 between 8 and 15 then '[8,15]'
when 首购时间 between 16 and 30 then '[16,30]'
when 首购时间 between 31 and 60 then '[31,60]'
else '大于60' end) 首购间隔
from
(select user_id,min(diff) 首购时间
from clea_data
where type='下单' and reg_date >='2018-02-01'
group by user_id) s1
select 首购间隔,count(user_id) 数量 from 首购分桶 group by 首购间隔
9.查询每周用户浏览活跃度情况
###绘制分桶视图
create table 浏览活跃度 as
select week_number 周数,(
case when 产品数=0 then '未访问'
when 产品数=1 then '不活跃'
when 产品数 between 2 and 5 then '活跃'
else '非常活跃' end) 活跃度
from
(select week_number,user_id,count(distinct sku_id) 产品数
from clea_data
where type='浏览'
group by user_id,week_number) s1
select 周数,count(活跃度) 活跃度 from 浏览活跃度 group by 周数
select user_id,(when 周数='5' then '5' end) as 5 from 浏览活跃度
10.查询下单转化率
select
(select count(user_id) from clea_data where type='浏览') as 浏览,
(select count(user_id) from clea_data where type='下单') as 下单,
(select count(user_id) from clea_data where type='下单')/(select count(user_id) from clea_data where type='浏览') as 转化率
11.查询复购率
select
(select count(distinct user_id) from clea_data where type='下单') 购买人数,
(select count(*) from (select user_id,count(type) 下单次数 from clea_data where type='下单' group by user_id having 下单次数>=2) s1) 二次购买人数,
((select count(*) from (select user_id,count(type) 下单次数 from clea_data where type='下单' group by user_id having 下单次数>=2) s1)/(select count(distinct user_id) from clea_data where type='下单')) as 复购率
12.查询用户复购时间间隔分布
create table min_date as
select user_id,min(action_date) 首购 from clea_data where type='下单' group by user_id
create table diff_table as
(select s1.user_id,s1.action_date,首购 from (select user_id,action_date
from clea_data
where type='下单') s1 left join min_date s2 on s1.user_id=s2.user_id)
###增加日期差列 ###新建日期差###
alter table diff_table add diff_date int
update diff_table set diff_date=DATEDIFF(action_date,首购)
select * from diff_table
###绘制分桶视图
create table 复购分桶 as
select user_id,(
case when 0
when diff_date between 4 and 7 then '[4,7]'
when diff_date between 8 and 15 then '[8,15]'
when diff_date between 16 and 30 then '[16,30]'
when diff_date between 31 and 60 then '[31,60]'
when diff_date between 61 and 90 then '[61,90]'
end) 购物间隔
from diff_table
select 购物间隔,count(user_id) 数量 from 复购分桶 group by 购物间隔
select * from 复购分桶
13.查询新注册用户复购时间间隔分布
create table newmin_date as
select user_id,min(action_date) 首购 from clea_data where type='下单' and reg_date>='2018-02-01' group by user_id;
create table newdiff_table as
(select s1.user_id,s1.action_date,首购 from (select user_id,action_date
from clea_data
where type='下单' and reg_date>='2018-02-01') s1 right join newmin_date s2 on s1.user_id=s2.user_id)
###增加日期差列 ###新建日期差###
alter table newdiff_table add newdiff_date int
update newdiff_table set newdiff_date=DATEDIFF(action_date,首购)
select * from newdiff_table
drop table newdiff_table
drop table 新复购分桶
###绘制分桶视图
create table 新复购分桶 as
select user_id,(
case when 0
when newdiff_date between 4 and 7 then '[4,7]'
when newdiff_date between 8 and 15 then '[8,15]'
when newdiff_date between 16 and 30 then '[16,30]'
when newdiff_date between 31 and 60 then '[31,60]'
when newdiff_date between 61 and 90 then '[61,90]'
end) 购物间隔
from newdiff_table
select 购物间隔,count(distinct user_id) 数量 from 新复购分桶 group by 购物间隔
select * from 新复购分桶
14.查询付费用户的性别构成
select sex,count(user_id) 人数 from clea_data where type='下单' group by sex
15.查询付费用户的年龄构成
select age,count(user_id) 人数 from clea_data where type='下单' group by age
16.查询省份的销量分布
select province,count(user_id) 销量 from clea_data where type='下单' group by province order by 销量 desc
17.查询销量的城市级别分布
select city_level,count(user_id) 销量 from clea_data where type='下单' group by city_level order by 销量 desc
18.查询时段uv分布
select action_hour,count(distinct user_id) 销量 from clea_data where type='下单' group by action_hour
19.产品类型销量top20
select cate,count(user_id) 销量 from clea_data where group by cate order by 销量 desc limit 20
20.产品类型访问量top20
select cate,count(user_id) 浏览量 from clea_data where type='浏览' group by cate order by 浏览量 desc limit 20
21.品牌销量top20
select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc limit 20
22.查询品牌累计百分比
select 销量,row_number()over(order by 销量 desc) 排名,(sum(销量)over(order by 销量 desc)/(select sum(销量) from (select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc)s1)) 累加百分比,sum(销量)over(order by 销量 desc) as 累加和
from (select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc) s1
22.制作RF模型
###创建RF表
create table rfm AS
select user_id,F,datediff((select max(action_date) from clea_data where type='下单'),R) R
from
(select user_id,count(type) F,max(action_date) R from clea_data where type='下单' group by user_id) s1
###给RF表赋值
create table rfm_level as
(select user_id,R,F,CONCAT(R,F) lev FROM
(select user_id,(
case when (SELECT avg(R) FROM rfm)<=R then '1'
when (SELECT avg(R) FROM rfm)>R then '0'
end) R,(
case when (SELECT avg(F) FROM rfm)<=F then '1'
when (SELECT avg(F) FROM rfm)>F then '0'
end) F
from rfm) S2)
#替换
update rfm_level set lev=REPLACE(lev,'11','保持') ;
update rfm_level set lev=REPLACE(lev,'10','新手') ;
update rfm_level set lev=REPLACE(lev,'01','唤醒') ;
update rfm_level set lev=REPLACE(lev,'00','流失') ;
select lev 用户类别,count(user_id) 数量 from rfm_level group by lev