本篇文章对sql的应用进行总结,使用sql从不同角度对销售数据进行分析
项目简介
本篇文章使用的是某产品3、4、5月份的消费信息和用户信息,对这两份数据来分析该产品这三个月的销售情况。
数据介绍
userinfo 客户信息表
userId 客户id
sex 性别
brith 出生日期
orderinfo 订单信息表
orderId 订单序号(虚拟主键)
userId 客户id
isPaid 是否支付
price 商品价格
paidTime 支付时间
以上两个表格即为本次分析的数据,匹配字段是userid
分析角度
统计不同月份的下单人数
统计用户三月份的复购率和回购率
统计男女用户的消费频次是否有差异
统计多次消费的用户,第一次和最后一次消费的间隔是多少?
统计不同年龄段,用户的消费金额是否有差异?
统计消费的二八法则,消费top 20%的用户,贡献了多少的额度?
MySQL分析
1.导入数据和清理数据
导入数据可以选择:
从workbench导入,效率较慢
使用命令行将数据导入,对数据量大的数据操作起来效率高
#创建数据库,表格
create database data;
create table orderinfo(
orderid int primary key ,
userid int,
isPaid varchar(10),
price float,
paidTime varchar(30));
create table userinfo(
userid int primary key,
sex varchar(10),
birth date);
查看所有表格
#导入数据,csv文件以逗号为分隔符
load data local infile 'C:/Users/Administrator/Desktop/order_info_utf.csv' into table orderinfo fields terminated by ',';
load data local infile 'C:/Users/Administrator/Desktop/user_info_utf.csv' into table userinfo fields terminated by ',';
#将时间格式标准化为2000-01-01
update orderinfo set paidtime=replace(paidtime,'/','-') where paidtime is not null;
#更新字符串为日期格式,方便接下来使用日期函数操作
update orderinfo set paidtime=str_to_date(paidtime,'%Y-%m-%d %H:%i') where paidtime is not null;
处理后的paidtime
2.业务分析
2.1统计不同月份的下单人数
select month(paidtime) as month,count(distinct userid) from orderinfo as order_number
where ispaid='已支付'
group by month(paidtime);
不同月份下单人数
2.2统计用户三月份的复购率
select count(if(se>1,1,null))/count(1) as '复购率' from (
select userid,count(userid) as se from orderinfo
where ispaid='已支付'
and month(paidtime) = 3
group by userid) t;
3月份复购率
2.3统计三月份的回购率
#反复利用子查询(每月消费的userid,并利用分组进行去重)
select t1.m,count(t2.m)/count(t1.m) as '回购率' from (
select userid,date_format(paidtime,'%Y-%m-01') as m
from orderinfo
where ispaid='已支付'
group by userid,date_format(paidtime,'%Y-%m-01'))t1
left join (
select userid,date_format(paidtime,'%Y-%m-01') as m
from orderinfo
where ispaid='已支付'
group by userid,date_format(paidtime,'%Y-%m-01'))t2
on t1.userid = t2.userid and t1.m = date_sub(t2.m,interval 1 month)
group by t1.m
每月回购率
2.4统计男女用户的消费频次是否有差异
select sex,avg(ct) as frequency from (
select t1.userid,t2.sex,count(*) as ct
from orderinfo t1
inner join (
select * from userinfo
where sex != '')t2
on t1.userid = t2.userid
group by userid)t3
group by sex;
男女用户消费频次差异
2.5统计多次消费的用户,第一次和最后一次消费间隔是多少?(生命周期,用户消费时间)
select
userid,
datediff(max(paidtime),min(paidtime)) "消费间隔"
from orderinfo
where ispaid = '已支付'
group by userid
having count(paidtime) > 1;
用户消费间隔
2.6统计不同年龄段,用户的消费金额是否有差异(用户消费基本画像)
#各个年龄组的消费频次、消费金额
select age,avg(ct),round(sum(amount),2) from (
#每个用户的消费频次、所属年龄组、消费金额
select o.userid,age,sum(price) as amount,count(*) as ct
from orderinfo o
#内连接表userinfo获取年龄信息
inner join
#将userinfo表按年龄分段
(select userid,round((year(now())-year(birth))/10) as age
from userinfo
where birth > '1919-00-00')t1
on o.userid = t1.userid
WHERE ispaid = '已支付'
group by o.userid,age)t2
group by t2.age
不同年龄段用户消费频次、金额
2.7统计消费的二八法则,消费的top20%的用户,贡献多少额度(top客户)
#先计算出总的用户数与消费总金额
SELECT count(userid),sum(total) from (
SELECT userid,sum(price) as total
FROM orderinfo
where ispaid = '已支付'
GROUP BY userid
ORDER BY total DESC)t
总用户数与消费总金额
SELECT count(userid),sum(total),sum(total)/318503081 from (
SELECT userid,sum(price) as total
FROM orderinfo
where ispaid = '已支付'
GROUP BY userid
ORDER BY total DESC
LIMIT 17000)t
前20%的用户消费金额及在总消费金额中占比