实战整理-阿里天池淘宝用户购物行为数据集实战(MySQL数据分析+Navicat)

数据集链接:淘宝用户购物行为数据集_数据集-阿里云天池

因原数据集过大,电脑和MySQL跑不动,故截取前一百万条数据进行实战演练、

1、导入数据

利用navicat软件直接导入下载好的Excel文件(较大数据集可用kettle导入)

 2、数据预处理

use aori;
desc userbehavior;
select * from userbehavior limit 5;

--更改字段名

alter table userbehavior change f1 user_id int;
alter table userbehavior change f2 item_id int;
alter table userbehavior change f3 category_id int;
alter table userbehavior change f4 behavior_type varchar(5);
alter table userbehavior change f5 timestamps int;


--检查空值

select * from userbehavior where user_id is null;
select * from userbehavior where item_id is null;
select * from userbehavior where category_id is null;
select * from userbehavior where behavior_type is null;
select * from userbehavior where timestamps is null;

--检查重复值
select user_id,item_id,timestamps from userbehavior
group by user_id,item_id,timestamps
having count(*) > 1;

--去重

-设置主键

alter table userbehavior add id int first;
alter table userbehavior modify id int primary key auto_increment;
select * from userbehavior limit 5;

-去除重复值

delete userbehavior from 
userbehavior,
(
select user_id,item_id,timestamps,min(id) id from userbehavior
group by user_id,item_id,timestamps
having count(*) > 1
) t2
where userbehavior.user_id = t2.user_id
and userbehavior.item_id = t2.item_id
and userbehavior.timestamps  = t2.timestamps
and userbehavior.id > t2.id

--新增日期
- datetime

alter table userbehavior add datetimes TIMESTAMP(0);
update userbehavior set datetimes=FROM_UNIXTIME(timestamps);
select * from userbehavior limit 5;

-time
alter table userbehavior add dates char(10);
alter table userbehavior add times char(8);
alter table userbehavior add hours char(2);

update userbehavior set dates=substring(datetimes,1,10);
update userbehavior set times=substring(datetimes,12,8);
update userbehavior set hours=substring(datetimes,12,2);
select * from userbehavior limit 5;

-- 去异常
select max(datetimes),min(datetimes) from userbehavior;

delete from userbehavior
where datetimes < '2017-11-25 00:00:00'
or datetimes > '2017-12-03 23:59:59';


--数据概览
desc userbehavior;
select * from userbehavior limit 5;
select count(1) from userbehavior; # 999530条记录



3、时间序列分析

-- 统计日期-小时行为
select dates,hours
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
from userbehavior
group by dates,hours
order by dates,hours


create table date_hour_behavior(
dates char(10),
hours char(2),
pv int,
cart int,
fav int,
buy int
);


insert into date_hour_behavior
select dates,hours
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
from userbehavior
group by dates,hours
order by dates,hours

select * from date_hour_behavior 

 4、获客情况

--创建临时表
create table temp_behaviors like userbehavior


--截取

insert into temp_behaviors
select * from userbehavior limit 100000;

select * from temp_behaviors;


-- pv(Page View页面浏览量)

select dates
,count(behavior_type) 'pv'
from temp_behaviors
where behavior_type = 'pv'
group by dates;


-- UV(Uniqque Visitor独立访客数)

select dates
,count(distinct user_id) 'uv'
from temp_behaviors
where behavior_type = 'pv'
group by dates;


-- 一条语句

select dates
,count(behavior_type) 'pv'
,count(distinct user_id) 'uv'
,round(count(behavior_type)/count(distinct user_id),1) 'pv/uv' 
from temp_behaviors
where behavior_type = 'pv'
group by dates;


-- 处理真实数据
create table pv_uv_puv(
dates char(10),
pv int(9),
uv int(9),
puv decimal(10,1)
);


insert into pv_uv_puv
select dates
,count(behavior_type) 'pv'
,count(distinct user_id) 'uv'
,round(count(behavior_type)/count(distinct user_id),1) 'pv/uv' 
from userbehavior
where behavior_type = 'pv'
group by dates;

select * from pv_uv_puv;



 5、留存情况

  ①留存率计算

select user_id,dates
from userbehavior 
group by user_id,dates;


-- 自关联

select * from 
(
select user_id,dates
from userbehavior 
group by user_id,dates
) a
,(
select user_id,dates
from userbehavior 
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates <= b.dates


-- 留存数
select a.dates
,count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_0
,count(if(datediff(b.dates,a.dates)=1,b.user_id,null)) retention_1
,count(if(datediff(b.dates,a.dates)=3,b.user_id,null)) retention_3
,count(if(datediff(b.dates,a.dates)=1,b.user_id,null))/count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_rate_1
from
(
select user_id,dates
from userbehavior 
group by user_id,dates
) a
,(
select user_id,dates
from userbehavior 
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates <= b.dates
group by a.dates


-- 保存结果
create table retention_rate(
dates char(10),
retention_1 float
);

insert into retention_rate
select a.dates
,count(if(datediff(b.dates,a.dates)=1,b.user_id,null))/count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_rate_1
from
(
select user_id,dates
from userbehavior 
group by user_id,dates
) a
,(
select user_id,dates
from userbehavior 
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates <= b.dates
group by a.dates


select * from retention_rate;

②跳失用户计算

-- 跳失率

-- 跳失用户

select count(*)
from
(
select user_id from userbehavior
group by user_id
having count(behavior_type)=1
) a;


select sum(pv) from pv_uv_puv;

 6、行为路径分析


create view user_behavior_view as 
select user_id,item_id
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
from userbehavior
group by user_id,item_id


-- 用户行为标准化

create view user_behavior_standard as 
select user_id,item_id
,(case when pv>0 then 1 else 0 end) '浏览'
,(case when fav>0 then 1 else 0 end) '收藏'
,(case when cart>0 then 1 else 0 end) '加购'
,(case when buy>0 then 1 else 0 end) '购买'
from user_behavior_view


-- 路径类型

create view user_behavior_path as 
select *,
concat(浏览,收藏,加购,购买) as path_type
from user_behavior_standard as a 
where a.购买 > 0;


-- 统计各类型购买数量

create view path_count as 
select path_type
,count(*) path_type_num
from user_behavior_path
group by path_type
order by path_type_num desc;


select * from path_count;

-- 改名表

create table change_name(
path_type char(4),
description varchar(40));


insert into change_name
values('0001','购买'),
('1001','浏览购买'),
('0011','加购购买'),
('1011','浏览加购购买'),
('0101','收藏购买'),
('1101','浏览收藏购买'),
('0111','收藏加购购买'),
('1111','浏览收藏加购购买');

select * from change_name;

create table path_result
(description varchar(40)
,path_type_num int);



insert into path_result
select description,
path_type_num
from path_count
join change_name
on path_count.path_type = change_name.path_type;

select * from path_result;

 7、用户转化率分析


-- 统计各类行为用户数

select behavior_type
,count(DISTINCT user_id) user_num
from userbehavior
group by behavior_type
order by behavior_type desc


create table behavior_user_num(
behavior_type varchar(5),
user_num int
);


insert into behavior_user_num
select behavior_type
,count(DISTINCT user_id) user_num
from userbehavior
group by behavior_type
order by behavior_type desc;


select * from behavior_user_num;

-- 统计各类行为数量


select behavior_type
,count(*) behavior_count_num
from userbehavior
group by behavior_type
order by behavior_type desc

create table behavior_num(
behavior_type varchar(5),
behavior_count_num int
);

insert into behavior_num
select behavior_type
,count(*) behavior_count_num
from userbehavior
group by behavior_type
order by behavior_type desc;

select * from behavior_num;

 8、TOP商品

-- 品类浏览量TOP10

create table popular_categories(
category_id int,
pv int
);


insert into popular_categories
select category_id
,count(if(behavior_type='pv',behavior_type,null))  '品类浏览量'
from userbehavior
group by category_id
order by 2 desc
limit 10;

select * from popular_categories;

-- 商品浏览量TOP10

create table popular_items(
item_id int,
pv int
);

insert into popular_items
select item_id
,count(if(behavior_type='pv',behavior_type,null))  '商品浏览量'
from userbehavior
group by item_id
order by 2 desc
limit 10;

select * from popular_items;

-- 各类别商品浏览量TOP10

create table popular_cateitems(
category_id int,
item_id int,
pv int
);

insert into popular_cateitems
select category_id,item_id,品类商品浏览量 from
(
select category_id
,item_id
,count(if(behavior_type='pv',behavior_type,null))  '品类商品浏览量'
,rank() over(partition by category_id order by count(if(behavior_type='pv',behavior_type,null)) desc) r
from userbehavior
group by category_id,item_id
order by 3 desc
) a 
where a.r = 1
order by a.品类商品浏览量 desc
limit 10;

select * from popular_cateitems;

9、RFM模型


-- 最近购买时间
select user_id
,max(dates) '最近购买时间'
from userbehavior
where behavior_type = 'buy'
group by user_id
order by 2 desc;


-- 购买次数
select user_id
,count(user_id) '购买次数'
from userbehavior
where behavior_type = 'buy'
group by user_id
order by 2 desc;


-- 统一
select user_id
,max(dates) '最近购买时间'
,count(user_id) '购买次数'
from userbehavior
where behavior_type = 'buy'
group by user_id
order by 2 desc,3 desc;

-- 存储

drop table if exists rfm_model;
create table rfm_model(
user_id int,
recently char(10),
frequency int
);


insert into rfm_model
select user_id
,max(dates) '最近购买时间'
,count(user_id) '购买次数'
from userbehavior
where behavior_type = 'buy'
group by user_id
order by 2 desc,3 desc;

-- 根据购买次数对用户进行分层

alter table rfm_model add column fscore int;

update rfm_model
set fscore = 
case when frequency >= 20 then 5
when frequency between 15 and 19 then 4
when frequency between 10 and 15 then 3
when frequency between 5 and 10 then 2
else 1
end;

-- 根据最近购买时间对用户进行分层

alter table rfm_model add column rscore int;

update rfm_model
set rscore = 
case when recently = '2017-12-03' then 5
when recently in ('2017-12-01','2017-12-02') then 4
when recently in ('2017-11-30','2017-11-29') then 3
when recently in ('2017-11-28','2017-11-27') then 2
else 1
end;

-- 分层

- 设置变量
set @f_avg = null;
set @r_avg = null;
select avg(fscore) into @f_avg from rfm_model;
select avg(rscore) into @r_avg from rfm_model;

select *
,(case
when fscore > @f_avg and rscore > @r_avg then '价值用户'
when fscore > @f_avg and rscore < @r_avg then '保持用户'
when fscore < @f_avg and rscore > @r_avg then '发展用户'
when fscore < @f_avg and rscore < @r_avg then '挽留用户'
end) class
from rfm_model;

-- 插入

alter table rfm_model add column class varchar(40);

update rfm_model
set class=
case
when fscore > @f_avg and rscore > @r_avg then '价值用户'
when fscore > @f_avg and rscore < @r_avg then '保持用户'
when fscore < @f_avg and rscore > @r_avg then '发展用户'
when fscore < @f_avg and rscore < @r_avg then '挽留用户'
end;

select * from rfm_model;



-- 统计各分区用户数
select class
,count(user_id) class_num
from rfm_model
group by class;

 

  • 3
    点赞
  • 54
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
背景描述 客户购物偏好数据集提供了有关消费者行为和购买模式的宝贵见解。了解客户偏好和趋势对于企业定制产品、营销策略和整体客户体验至关重要。 本数据集捕捉了广泛的客户属性,包括年龄、性别、购买历史、首选支付方式、购买频率等。分析这些数据可以帮助企业做出明智的决策、优化产品和提高客户满意度。 本数据集包含与客户购物偏好相关的各种特征的 3900 条记录,为企业收集了必要的信息,以加强对客户群的了解。 数据说明 字段 说明 Customer ID 客户唯一标识符 Age 客户年龄 Gender 客户性别(男/女) Item Purchased 客户购买的商品 Category 购买商品的类别 Purchase Amount (USD) 购买金额(美元) Location 购买地点 Size 购买商品的尺码 Color 购买商品的颜色 Season 购买商品的季节 Review Rating 客户对购买商品的评分 Subscription Status 客户是否拥有订阅(是/否) Shipping Type 客户选择的配送方式 Discount Applied 是否应用了折扣(是/否) Promo Code Used 是否使用了优惠码(是/否) Previous Purchases 客户在该商店的历史购买总数,不包括当前交易 Payment Method 客户最常用的支付方式 Frequency of Purchases 客户购买频率(每周、每两周、每月等) 问题描述 分析不同客户群体的消费行为差异(按年龄段、性别、地区等划分客户群体) 分析不同类别商品的销售情况,找出畅销商品 分析各季节的销售趋势,确定高峰销售季节 分析优惠活动的效果,如折扣、优惠码的使用情况 分析客户忠诚度,如回购率、评分、购买频率等指标 分析付款方式偏好,优化支付流程 利用历史数据建立商品推荐系统 预测未来销量,进行库存管理和供应链规划

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值