sql查询连续大于0的次数_数据分析sql+tableau+python项目(tableau+sql篇)

58e63a2d6ba311365dc77c1dbeb5849c.png

电商用户行为数据分析

用到的sql知识点:

增:alter table add(增加列)

删:drop table (删除临时表)

改:update name set 列=replace(列名,n,m)

查:分组group by、连接left join、判断case when then end、日期字符串转换str_to_date窗口函数sum()over(order by )连续排序row_number、临时视图creat table as select、日期差datediff(n,m)、条件查询<=> !=

1、tableau故事

ea93a6419f10322b8ce013ab95a266e8.png

4b605d063f626d625952575fc4c585ac.png

1e7ccba727d9dfe164ce4e7311d48fef.png

e516f32a13750920d41ebb075dc0096e.png

ab38bb63a3cd49dafab92406f4fff2b1.png

2、sql代码

一、数据探索

use jd2;
desc clea_data
select * from clea_data limit 3
delete from clea_data where action_date='2018-03-27' or action_date='2018-03-28'

二、数据分析

1 查询uv和pv趋势变化

select action_date 日期,count(user_id) pv,count(distinct user_id) uv
from clea_data where type='浏览' group by action_date;

2查询订单趋势

select action_date 日期,count(user_id) 订单
from clea_data where type='下单' group by action_date

3查询首购用户数趋势

select s1.日期,count(s1.user_id) 首购
from (select user_id,min(action_date) 日期
from clea_data where type='下单' and reg_date>'2018-02-01' group by user_id) s1 group by 日期 order by 日期

4.新老用户uv数据比较

select s1.日期,新客uv,老客uv from 
(select action_date 日期,count(distinct user_id) 新客uv
from clea_data where type='浏览' and reg_date>='2018-02-01' group by action_date) s1 inner join
(select action_date 日期,count(distinct user_id) 老客uv
from clea_data where type='浏览' and reg_date <'2018-02-01' group by action_date) s2 on s1.日期=s2.日期

5.新老用户订单数据比较

select s1.日期,新客订单,老客订单 from 
(select action_date 日期,count(distinct user_id) 新客订单
from clea_data where type='下单' and reg_date>='2018-02-01' group by action_date) s1 inner join
(select action_date 日期,count(distinct user_id) 老客订单
from clea_data where type='下单' and reg_date <'2018-02-01' group by action_date) s2 on s1.日期=s2.日期

6.查询新增注册人数年趋势变化

select reg_date 注册日期,count(user_id) 注册人数 from  clea_data group by reg_date

7、查询2018年新增注册人数变化

select reg_date 注册日期,count(user_id) 注册人数 from  clea_data where reg_date>='2018-01-01' group by reg_date

8.查询注册时间与首购时间间隔天数分布

###增加日期差列

alter table clea_data add diff int

###新建日期差

update clea_data set diff=DATEDIFF(action_date,reg_date)

###绘制分桶视图

create table 首购分桶 as
select user_id,(
	case when 首购时间=0 then '0'
				when 首购时间 between 1 and 3 then '[1,3]'
				when 首购时间 between 4 and 7 then '[4,7]'
				when 首购时间 between 8 and 15 then '[8,15]'
				when 首购时间 between 16 and 30 then '[16,30]'
				when 首购时间 between 31 and 60 then '[31,60]'
				else '大于60' end)  首购间隔 
from
		(select user_id,min(diff) 首购时间
		 from clea_data 
		 where type='下单' and reg_date >='2018-02-01' 
		 group by user_id) s1

select 首购间隔,count(user_id) 数量 from 首购分桶 group by 首购间隔

9.查询每周用户浏览活跃度情况

###绘制分桶视图

create table 浏览活跃度 as 
select week_number 周数,(
	 case when 产品数=0 then '未访问'
				when 产品数=1 then '不活跃'
				when 产品数 between 2 and 5 then '活跃'
				else '非常活跃' end) 活跃度 
from
		(select week_number,user_id,count(distinct sku_id) 产品数
		 from clea_data 
		 where type='浏览'
		 group by user_id,week_number) s1
 select 周数,count(活跃度) 活跃度 from 浏览活跃度 group by 周数
 select user_id,(when 周数='5' then '5' end) as 5 from 浏览活跃度

10.查询下单转化率

select 
 (select count(user_id) from clea_data where type='浏览') as 浏览,
  (select count(user_id) from clea_data where type='下单') as 下单,
	(select count(user_id) from clea_data where type='下单')/(select count(user_id) from clea_data where type='浏览') as 转化率

11.查询复购率

select
(select count(distinct user_id) from clea_data where type='下单') 购买人数,
(select count(*) from (select user_id,count(type) 下单次数 from clea_data where type='下单' group by user_id having 下单次数>=2) s1) 二次购买人数,
((select count(*) from (select user_id,count(type) 下单次数 from clea_data where type='下单' group by user_id having 下单次数>=2) s1)/(select count(distinct user_id) from clea_data where type='下单')) as 复购率

12.查询用户复购时间间隔分布

create table min_date as 
select user_id,min(action_date) 首购 from clea_data where type='下单' group by user_id

create table diff_table as 
(select s1.user_id,s1.action_date,首购 from (select user_id,action_date
		 from clea_data 
		 where type='下单') s1 left join min_date s2 on s1.user_id=s2.user_id)

###增加日期差列 ###新建日期差###

alter table diff_table add diff_date int
update diff_table set diff_date=DATEDIFF(action_date,首购)
select * from diff_table

###绘制分桶视图

create table 复购分桶 as 
select user_id,(
	case  when 0<diff_date and diff_date<=3 then '(0,3]'
				when diff_date between 4 and 7 then '[4,7]'
				when diff_date between 8 and 15 then '[8,15]'
				when diff_date between 16 and 30 then '[16,30]'
				when diff_date between 31 and 60 then '[31,60]'
				when diff_date between 61 and 90 then '[61,90]'
				end)  购物间隔 
from diff_table 
select 购物间隔,count(user_id) 数量 from 复购分桶 group by 购物间隔
select * from 复购分桶

13.查询新注册用户复购时间间隔分布

create table newmin_date as 
select user_id,min(action_date) 首购 from clea_data where type='下单' and reg_date>='2018-02-01' group by user_id;

create table newdiff_table as 
(select s1.user_id,s1.action_date,首购 from (select user_id,action_date
		 from clea_data 
		 where type='下单' and reg_date>='2018-02-01') s1 right join newmin_date s2 on s1.user_id=s2.user_id)

###增加日期差列 ###新建日期差###

alter table newdiff_table add newdiff_date int
update newdiff_table set newdiff_date=DATEDIFF(action_date,首购)
select * from newdiff_table
drop table newdiff_table
drop table 新复购分桶

###绘制分桶视图

create table 新复购分桶 as
select user_id,(
	case  when 0<newdiff_date and newdiff_date<=3 then '(0,3]'
				when newdiff_date between 4 and 7 then '[4,7]'
				when newdiff_date between 8 and 15 then '[8,15]'
				when newdiff_date between 16 and 30 then '[16,30]'
				when newdiff_date between 31 and 60 then '[31,60]'
				when newdiff_date between 61 and 90 then '[61,90]'
				end)  购物间隔 
from newdiff_table 
select 购物间隔,count(distinct user_id) 数量 from 新复购分桶 group by 购物间隔
select * from 新复购分桶

14.查询付费用户的性别构成

select sex,count(user_id) 人数 from clea_data where type='下单' group by sex

15.查询付费用户的年龄构成

select age,count(user_id) 人数 from clea_data where type='下单' group by age

16.查询省份的销量分布

select province,count(user_id) 销量 from clea_data where type='下单' group by province order by 销量 desc

17.查询销量的城市级别分布

select city_level,count(user_id) 销量 from clea_data where type='下单' group by city_level order by 销量 desc

18.查询时段uv分布

select action_hour,count(distinct user_id) 销量 from clea_data where type='下单' group by action_hour

19.产品类型销量top20

select cate,count(user_id) 销量 from clea_data where  group by cate order by 销量 desc limit 20

20.产品类型访问量top20

select cate,count(user_id) 浏览量 from clea_data where type='浏览' group by cate order by 浏览量 desc limit 20

21.品牌销量top20

select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc limit 20

22.查询品牌累计百分比

select 销量,row_number()over(order by 销量 desc) 排名,(sum(销量)over(order by 销量 desc)/(select sum(销量) from (select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc)s1)) 累加百分比,sum(销量)over(order by 销量 desc) as 累加和
from (select brand,count(user_id) 销量 from clea_data where type='下单' group by brand order by 销量 desc) s1 

22.制作RF模型

###创建RF表

create table rfm AS
select user_id,F,datediff((select max(action_date) from clea_data where type='下单'),R) R 
from
(select user_id,count(type) F,max(action_date) R from clea_data where type='下单' group by user_id) s1

###给RF表赋值

create table rfm_level as 
(select user_id,R,F,CONCAT(R,F) lev FROM
(select user_id,(
	case  when (SELECT avg(R) FROM rfm)<=R then '1'
				when (SELECT avg(R) FROM rfm)>R then '0'
				end)  R,(
	case  when (SELECT avg(F) FROM rfm)<=F then '1'
				when (SELECT avg(F) FROM rfm)>F then '0'
				end)  F
from rfm) S2)

#替换

update rfm_level set lev=REPLACE(lev,'11','保持') ;
update rfm_level set lev=REPLACE(lev,'10','新手') ;
update rfm_level set lev=REPLACE(lev,'01','唤醒') ;
update rfm_level set lev=REPLACE(lev,'00','流失') ;
select lev 用户类别,count(user_id) 数量 from rfm_level group by lev
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值