财务数据分析sql python_数据分析sql+tableau+python项目(tableau+sql篇)

电商用户行为数据分析

用到的sql知识点:

增:alter table add(增加列)

删:drop table (删除临时表)

改:update name set 列=replace(列名,n,m)

查:分组group by、连接left join、判断case when then end、日期字符串转换str_to_date、窗口函数sum()over(order by )、连续排序row_number、临时视图creat table as select、日期差datediff(n,m)、条件查询<=> !=

1、tableau故事

2、sql代码

一、数据探索

use jd2;

desc clea_data

select * from clea_data limit 3

delete from clea_data where action_date='2018-03-27' or action_date='2018-03-28'

二、数据分析

1 查询uv和pv趋势变化

select action_date 日期,count(user_id) pv,count(distinct user_id) uv

from clea_data where type='浏览' group by action_date;

2查询订单趋势

select action_date 日期,count(user_id) 订单

from clea_data where type='下单' group by action_date

3查询首购用户数趋势

select s1.日期,count(s1.user_id) 首购

from (select user_id,min(action_date) 日期

from clea_data where type='下单' and reg_date>'2018-02-01' group by user_id) s1 group by 日期 order by 日期

4.新老用户uv数据比较

select s1.日期,新客uv,老客uv from

(select action_date 日期,count(distinct user_id) 新客uv

from clea_data where type='浏览' and reg_date>='2018-02-01' group by action_date) s1 inner join

(select action_date 日期,count(distinct user_id) 老客uv

from clea_data where type='浏览' and reg_date <'2018-02-01' group by action_date) s2 on s1.日期=s2.日期

5.新老用户订单数据比较

select s1.日期,新客订单,老客订单 from

(select action_date 日期,count(distinct user_id) 新客订单

from clea_data where type='下单' and reg_date>='2018-02-01' group by action_date) s1 inner join

(select action_date 日期,count(distinct user_id) 老客订单

from clea_data where type='下单' and reg_date <'2018-02-01' group by action_date) s2 on s1.日期=s2.日期

6.查询新增注册人数年趋势变化

select reg_date 注册日期,count(user_id) 注册人数 from clea_data group by reg_date

7、查询2018年新增注册人数变化

select reg_date 注册日期,count(user_id) 注册人数 from clea_data where reg_date>='2018-01-01' group by reg_date

8.查询注册时间与首购时间间隔天数分布

###增加日期差列

alter table clea_data add diff int

###新建日期差

update clea_data set diff=DATEDIFF(action_date,reg_date)

###绘制分桶视图

create table 首购分桶 as

select user_id,(

case when 首购时间=0 then '0'

when 首购时间 between 1 and 3 then '[1,3]'

when 首购时间 between 4 and 7 then '[4,7]'

when 首购时间 between 8 and 15 then '[8,15]'

when 首购时间 between 16 and 30 then '[16,30]'

when 首购时间 between 31 and 60 then '[31,60]'

else '大于60' end) 首购间隔

from

(select user_id,min(diff) 首购时间

from clea_data

where type='下单' and reg_date >='2018-02-01'

group by user_id) s1

select 首购间隔,count(user_id) 数量 from 首购分桶 group by 首购间隔

9.查询每周用户浏览活跃度情况

###绘制分桶视图

create table 浏览活跃度 as

select week_number 周数,(

case when 产品数=0 then '未访问'

when 产品数=1 then '不活跃'

when 产品数 between 2 and 5 then '活跃'

else '非常活跃' end) 活跃度

from

(select week_number,user_id,count(distinct sku_id) 产品数

from clea_data

where type='浏览'

group by user_id,week_number) s1

select 周数,count(活跃度) 活跃度 from 浏览活跃度 group by 周数

select user_id,(when 周数='5' then '5' end) as 5 from 浏览活跃度

10.查询下单转化率

select

(select count(user_id) from clea_data where type='浏览') as 浏览,

(select count(user_id) from clea_data where type='下单') as 下单,

(select count(user_id) from clea_data where type='下单')/(select count(user_id) from clea_data where type='浏览') as 转化率

11.查询复购率

select

(select count(distinct user_id) from clea_data where type='下单') 购买人数,

(select count(*) from (select user_id,count(type) 下单次数 from clea_data where type='下单' group by user_id having 下单次数>=2) s1) 二次购买人数,

((select count(*) from (select user_id,count(type) 下单次数 from clea_data where type='下单' group by user_id having 下单次数>=2) s1)/(select count(distinct user_id) from clea_data where type='下单')) as 复购率

12.查询用户复购时间间隔分布

create table min_date as

select user_id,min(action_date) 首购 from clea_data where type='下单' group by user_id

create table diff_table as

(select s1.user_id,s1.action_date,首购 from (select user_id,action_date

from clea_data

where type='下单') s1 left join min_date s2 on s1.user_id=s2.user_id)

###增加日期差列 ###新建日期差###

alter table diff_table add diff_date int

update diff_table set diff_date=DATEDIFF(action_date,首购)

select * from diff_table

###绘制分桶视图

create table 复购分桶 as

select user_id,(

case when 0

when diff_date between 4 and 7 then '[4,7]'

when diff_date between 8 and 15 then '[8,15]'

when diff_date between 16 and 30 then '[16,30]'

when diff_date between 31 and 60 then '[31,60]'

when diff_date between 61 and 90 then '[61,90]'

end) 购物间隔

from diff_table

select 购物间隔,count(user_id) 数量 from 复购分桶 group by 购物间隔

select * from 复购分桶

13.查询新注册用户复购时间间隔分布

create table newmin_date as

select user_id,min(action_date) 首购 from clea_data where type='下单' and reg_date>='2018-02-01' group by user_id;

create table newdiff_table as

(select s1.user_id,s1.action_date,首购 from (select user_id,action_date

from clea_data

where type='下单' and reg_date>='2018-02-01') s1 right join newmin_date s2 on s1.user_id=s2.user_id)

###增加日期差列 ###新建日期差###

alter table newdiff_table add newdiff_date int

update newdiff_table set newdiff_date=DATEDIFF(action_date,首购)

select * from newdiff_table

drop table newdiff_table

drop table 新复购分桶

###绘制分桶视图

create table 新复购分桶 as

select user_id,(

case when 0

when newdiff_date between 4 and 7 then '[4,7]'

when newdiff_date between 8 and 15 then '[8,15]'

when newdiff_date between 16 and 30 then '[16,30]'

when newdiff_date between 31 and 60 then '[31,60]'

when newdiff_date between 61 and 90 then '[61,90]'

end) 购物间隔

from newdiff_table

select 购物间隔,count(distinct user_id) 数量 from 新复购分桶 group by 购物间隔

select * from 新复购分桶

14.查询付费用户的性别构成

select sex,count(user_id) 人数 from clea_data where type='下单' group by sex

15.查询付费用户的年龄构成

select age,count(user_id) 人数 from clea_data where type='下单' group by age

16.查询省份的销量分布

select province,count(user_id) 销量 from clea_data where type='下单' group by province order by 销量 desc

17.查询销量的城市级别分布

select city_level,count(user_id) 销量 from clea_data where type='下单' group by city_level order by 销量 desc

18.查询时段uv分布

select action_hour,count(distinct user_id) 销量 from clea_data where type='下单' group by action_hour

19.产品类型销量top20

select cate,count(user_id) 销量 from clea_data where group by cate order by 销量 desc limit 20

20.产品类型访问量top20

select cate,count(user_id) 浏览量 from clea_data where type='浏览' group by cate order by 浏览量 desc limit 20

21.品牌销量top20

select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc limit 20

22.查询品牌累计百分比

select 销量,row_number()over(order by 销量 desc) 排名,(sum(销量)over(order by 销量 desc)/(select sum(销量) from (select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc)s1)) 累加百分比,sum(销量)over(order by 销量 desc) as 累加和

from (select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc) s1

22.制作RF模型

###创建RF表

create table rfm AS

select user_id,F,datediff((select max(action_date) from clea_data where type='下单'),R) R

from

(select user_id,count(type) F,max(action_date) R from clea_data where type='下单' group by user_id) s1

###给RF表赋值

create table rfm_level as

(select user_id,R,F,CONCAT(R,F) lev FROM

(select user_id,(

case when (SELECT avg(R) FROM rfm)<=R then '1'

when (SELECT avg(R) FROM rfm)>R then '0'

end) R,(

case when (SELECT avg(F) FROM rfm)<=F then '1'

when (SELECT avg(F) FROM rfm)>F then '0'

end) F

from rfm) S2)

#替换

update rfm_level set lev=REPLACE(lev,'11','保持') ;

update rfm_level set lev=REPLACE(lev,'10','新手') ;

update rfm_level set lev=REPLACE(lev,'01','唤醒') ;

update rfm_level set lev=REPLACE(lev,'00','流失') ;

select lev 用户类别,count(user_id) 数量 from rfm_level group by lev

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值