SQL淘宝数据分析

淘宝数据分析

--更改字段名

--更改字段名
alter table tb1 change timestamp timestamps int(14);
--检查空值
select * from tb1 where user_id is null;
select * from tb1 where item_id is null;
select * from tb1 where category_id is null;
select * from tb1 where behavior_type is null;
select * from tb1 where timestamps is null;
--检查重复值
select user_id,item_id,timestamps from tbs 
group by user_id,item_id,timestamps
having count(*) >1 ;
有53条重复记录
--去重(建立辅助字段辅助去重)
alter table tb1 add id int first;
alter table tb1 modify id int primary key auto_increment;
delete tb1 from tb1,
(
select user_id,item_id,timestamps,min(id) id from tbs 
group by user_id,item_id,timestamps
having count(*) >1 ) t2
where tb1.user_id = t2.user_id
and tb1.item_id=t2.item_id
and tb1.id>t2.id;
--新增日期:date time hour
--更改buffer值,更新缓冲值 
show variables like '%_buffer%';
set global innodb_buffer_size=10700000000;
--重启数据库

--修改数据库
alter table tb1 add datetimes timstamp(0);  #去掉毫秒
update tb1 set datetimes=from_unixtime(timestamps) ; datetimes更改成功
--新增小时段等
alter table tb1 add dates char(10);
alter table tb1 add times char(8);
alter table tb1 add hours char(10);
update tb1 set dates=substring(datetime,1,10); 
update tb1 set times=substring(datetime,12,8); 
update tb1 set hours=substring(datetime,12,2); 
--或者可以一次性插入
--去异常
select max(datetimes),min(datetimes) from tb1;
delete from tb1 where datetimes <'2017-11-25 00:00:00'
or datetimes>'2017-12-03 23:59:59';
-- 数据概览
select count(1) from tb1;

--创建临时表
create table temp1 like tb1;
--截取十万条数据
insert into temp1
select * from tb1 limit 100000;
--pv 页面浏览量
select dates,count(*) pv
from temp1 where behavior_type='pv'
group by dates;
--独立访客数 uv
select dates,count(distinct user_id) uv
from temp1 where behavior_type='pv'
group by dates;
--浏览深度pv/uv
--一条语句
select dates,count(*) 'pv',count(distinct user_id) 'uv',
round(count(*)/count(distinct user_id),1) 'pv/uv'
from temp1 where behavior_type='pv'
group by dates;
--处理真实数据 
create table pv_uv_puv(
dates char(10),
pv int(9).
uv int(9),
puv decimal(10,1)
);
insert into pv_uv_puv
select dates,count(*) 'pv',count(distinct user_id) 'uv',
round(count(*)/count(distinct user_id),1) 'pv/uv'
from tb1 where behavior_type='pv'
group by dates;

发现有null值 
delete from pv_uv_puv where dates is null;











--还有异常值
delete from tb1 where dates is null;
--留存率

select user_id,dates from tb1 group by user_id,dates;
--自关联的方式
select * from
(
select user_id,dates from tb1 group by user_id,dates) a
,(select user_id,dates from tb1 group by user_id,dates) b
where a.user_id = b.user_id 
--筛选
select * from
(
select user_id,dates from tb1 group by user_id,dates) a
,(select user_id,dates from tb1 group by user_id,dates) b
where a.user_id = b.user_id and a.dates < b.dates;

--留存数
select a.dates,count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_0,
count(if(datediff(b.dates,a.dates)=1,b.user_id,null)) retention_1,
count(if(datediff(b.dates,a.dates)=3,b.user_id,null)) retention_3
from
(
select user_id,dates from tb1 group by user_id,dates) a
,(select user_id,dates from tb1 group by user_id,dates) b
where a.user_id = b.user_id and a.dates <= b.dates
group by a.dates;
--留存率
select a.dates,
count(if(datediff(b.dates,a.dates)=1,b.user_id,null))/count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_1
from
(
select user_id,dates from tb1 group by user_id,dates) a
,(select user_id,dates from tb1 group by user_id,dates) b
where a.user_id = b.user_id and a.dates <= b.dates
group by a.dates;
--保存结果
create table retention_rate(
dates char(10),
retention_1 float);

insert into retention_rate
select a.dates,
count(if(datediff(b.dates,a.dates)=1,b.user_id,null))/count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_1
from
(
select user_id,dates from tb1 group by user_id,dates) a
,(select user_id,dates from tb1 group by user_id,dates) b
where a.user_id = b.user_id and a.dates <= b.dates
group by a.dates;

--跳失率 88
select count(*) from(
select user_id from tb1 group by user_id
having count(behavior_type)=1) a

select sum(pv) from pv_uv_puv 8966070




--统计日期小时的行为
select dates,hours,
count(if(behavior_type='pv',behavior_type,null)) 'pv',
count(if(behavior_type='cart',behavior_type,null)) 'cart',
count(if(behavior_type='pv',behavior_type,null)) 'fav',
count(if(behavior_type='pv',behavior_type,null)) 'buy',
from temp_behavior
group by dates,hours
order by dates,hours;
--存储
create table date_hour_behavior(
dates char(10),
hours char(2),
pv int,
cart int,
fav int,
buy int);
--结果插入
insert into date_hour_behavior
select dates,hours,
count(if(behavior_type='pv',behavior_type,null)) 'pv',
count(if(behavior_type='cart',behavior_type,null)) 'cart',
count(if(behavior_type='pv',behavior_type,null)) 'fav',
count(if(behavior_type='pv',behavior_type,null)) 'buy',
from tb1
group by dates,hours
order by dates,hours;
--用户转化率分析
--统计各类行为用户数
select  behavior_type,count(distinct user_id) user_num
from temp
group by behavior_type
order by behavior_type desc;
--存储
create table behavior_user_num(
behavior_type varchar(5),
user_num int);
insert into behavior_user_num
select  behavior_type,count(distinct user_id) user_num
from tb1
group by behavior_type
order by behavior_type desc;
--购买/浏览
select 672404/984105

--统计各类行为的数量
select  behavior_type,count(*) user_num
from tb1
group by behavior_type
order by behavior_type desc;
--存储
create table behavior_num(
behavior_type varchar(5),
user_num int);
insert into behavior_user_num
select  behavior_type,count(*) user_num
from tb1
group by behavior_type
order by behavior_type desc;
--行为路径分析 
select '难度增加'
create view user_behavior_view as 
select user_id,item_id
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
from tb1
group by user_id,item_id

--用户行为标准化
create view user_behavior_standard as
select user_id
,item_id
,(case when pv>0 then 1 else 0 end) 浏览了
,(case when fav>0 then 1 else 0 end) 收藏了
,(case when cart>0 then 1 else 0 end) 加购了
,(case when buy>0 then 1 else 0 end) 购买了
from user_behavior_view
--路径类型
create view user_behavior_path as 
select * , 
concat(浏览了,收藏了,加购了,购买了) 购买路径类型
from user_behavior_standard  a
where a.购买了>0
--统计各购买行为数量
create view path_count as
select 购买路径类型
,count(*) 数量
from user_behavior_path
group by 购买路径类型
order by 数量

-- 
create table a(
path_type char(4),
description varchar(40));
insert into a
values('0001','直接购买了'),
('1001','浏览后购买了'),
('0011','加购后购买了'),
('1011','浏览加购后购买了'),
('0101','收藏后购买了'),
('1101','浏览收藏后购买了'),
('0111','收藏加购后购买了'),
('1111','浏览收藏加购后购买了')

select * from path_count p
join a
on p.购买路径类型=a.path_type
order by 数量 desc
--存储
create table path_result(
path_tyoe char(4),
description varchar(40),
num(int);
insert into path_result
select path_type , description ,数量 from 
path_count p
join a
on p.购买路径类型=a.path_type
order by 数量 desc

--RFM模型
--没有金额 M 利用 FR分类
--最近购买时间
select user_id,max(dates) '最近购买时间'
from tb1
where behavior_type='buy'
group by user_id
order by 2 desc
-- 购买次数
select user_id,count(user_id) '购买次数'
from tb1
where behavior_type='buy'
group by user_id 
order by 2 desc
--统一
select user_id,count(user_id) '购买次数',
max(dates) '最近购买时间'
from tb1
where behavior_type='buy'
group by user_id 
order by 2 desc,3 desc
--存储 
drop table if exists rf_model;
create table rf_model(
user_id int,
frequency int
recent char(10)
);
insert into rf_model
select user_id,count(user_id) '购买次数',
max(dates) '最近购买时间'
from tb1
where behavior_type='buy'
group by user_id 
order by 2 desc,3 desc
--根据购买次数对用户进行分层
alter table rf_model add column fscore int;
update rf_model
set fscore=case
when frequency between 100 and 262 then 5
when frequency between 50 and 99 then 4
when frequency between 20 and 49 then 3
when frequency between 5 and 20 then 2
else 1
end


--根据最近购买对用户进行分层
alter table rf_model add column rscore int;
update rf_model
set rscore=case
when recent ='2017-12-03' then 5
when recent in ('2017-12-01','2017-12-02') then 4
when recent in ('2017-11-29','2017-11-30') then 3
when recent in ('2017-11-27','2017-11-28')then 2
else 1
end

--分层
set @f_avg=null;
set @r_avg=null;
select avg(fscore) into @f_avg from rf_model;
select avg(rscore) into @r_avg from rf_model;

select *
,(case
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore>@r_avg then '保持用户'
when fscore>@f_avg and rscore>@r_avg then '发展用户'
when fscore>@f_avg and rscore>@r_avg then '挽留用户'
end) class
from rf_model

--插入
alter table rf_model add column class varchar(40);
update rf_model
set class = case 
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore>@r_avg then '保持用户'
when fscore>@f_avg and rscore>@r_avg then '发展用户'
when fscore>@f_avg and rscore>@r_avg then '挽留用户'
end

--统计各分区的用户数
select class ,count(user_id) from rf_model
group by class
--只有9天对r作用不是很大





--热门品类
select category_id
,count(if(behavior_type='pv',behavior_type,null)) '品类浏览量'
from tb1
group by category_id 
order by 2 desc
limit 10;

--热门商品
select item_id
,count(if(behavior_type='pv',behavior_type,null)) '商品浏览量'
from tb1
group by item_id
order by 2 desc
limit 10
--热门品类中最受欢迎的商品
select category_id,item_id,
品类商品浏览量 from 
(
select category_id,item_id
,count(if(behavior_type='pv',behavior_type,null)) '品类商品浏览量'
,rank()over(partition by category_id order by '品类商品浏览量' desc) r
from tb1
group by category_id,item_id
order by 3 desc
) a
where a.r=1
order by a.品类商品浏览量 desc
limit 10

create table popular_categories(
category_id int,
pv int);

create table popular_items(
item_id int,
pv int);

create table popular_cateitems(
category_id int,
item_id int
pv int);

insert into popular_categories
select category_id
,count(if(behavior_type='pv',behavior_type,null)) '品类浏览量'
from tb1
group by category_id 
order by 2 desc
limit 10;

insert into popular_items
select item_id
,count(if(behavior_type='pv',behavior_type,null)) '商品浏览量'
from tb1
group by item_id
order by 2 desc
limit 10

insert into popular_cateitems
select category_id,item_id,
品类商品浏览量 from 
(
select category_id,item_id
,count(if(behavior_type='pv',behavior_type,null)) '品类商品浏览量'
,rank()over(partition by category_id order by '品类商品浏览量' desc) r
from tb1
group by category_id,item_id
order by 3 desc
) a
where a.r=1
order by a.品类商品浏览量 desc
limit 10






-- 特定商品转化率
select item_id
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
,count(distinct if(behavior_type='buy',user_id,null))/count(distinct user_id) 商品转化率
from tb1
group by item_id
order by 商品转化率 desc

--保存
create table item_detail(
item_id int
,pv int
,fav int
,cart int
,buy int
,user_buy_rate float);
insert into item_detail
select item_id
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
,count(distinct if(behavior_type='buy',user_id,null))/count(distinct user_id) 商品转化率
from tb1
group by item_id
order by 商品转化率 desc;
-- 品类转化率

create table category_detail(
category_id int
,pv int
,fav int
,cart int
,buy int
,user_buy_rate float);
insert into category_detail
select category_id
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
,count(distinct if(behavior_type='buy',user_id,null))/count(distinct user_id) 品类转化率
from tb1
group by category_id
order by 品类转化率 desc;




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值