一、准备好订单数据和用户数据
涉及表:
orderinfo 订单详情表| orderid 订单id
| userid 用户id
| isPaid 是否支付
| price 付款价格
| paidTime 付款时间
userinfo 用户信息表| userid 用户id
| sex 用户性别
| birth 用户出生日期
二、业务要求
1、统计不同月份的下单人数
2、统计男女用户消费频次是否有差异
3、统计用户三月份的回购率和复购率
4、统计多次消费的用户,第一次和最后一次消费间隔是多少天
5、统计不同年龄段,用户的消费金额是否有差异
6、统计消费的二八法则,消费排名前20%用户,贡献了多少消费额
三、练习
1、统计不同月份的下单人数
SELECT YEAR(paidtime),MONTH(paidtime), count(DISTINCT userid) as 当月下单人数
from orderinfo
WHERE ispaid = "已支付" and paidtime <> "0000-00-00 00:00:00"
GROUP BY YEAR(paidtime),MONTH(paidtime);
#YEAR()函数返回日期的年份。
#MONTH()函数返回1到12之间的整数。运行结果展示
2、统计男女用户消费频次是否有差异
思路:首先找出每个用户的消费次数,带性别。然后按性别计算平均数。
#首先找出每个用户的消费次数,带性别
SELECT a.userid, sex, count(1) as cons FROM orderinfo a
INNER JOIN (select * from userinfo where sex <>" ") b #有些性别空缺
ON a.userid=b.userid
WHERE ispaid = "已支付"
GROUP BY a.userid, sex
#按性别计算平均数
SELECT sex, avg(cons) FROM
(SELECT a.userid, sex, count(1) as cons FROM orderinfo a
INNER JOIN (select * from userinfo where sex <>" ") b #有些性别空缺
ON a.userid=b.userid
WHERE ispaid = "已支付"
GROUP BY a.userid, sex) c
GROUP BY sex运行结果展示
3、统计用户三月份的回购率和复购率
首先定义:复购率:当月购买了多次的用户占当月用户的比例
回购率:上月购买用户中有多少用户本月又再次购买= 3月用户中4月又再次购买的人数 / 3月的用户总数
计算复购率:思路是先找出3月份的orderinfo,统计每个用户的消费次数,找到消费次数大于1的用户数,计算复购率。
#找出3月份的orderinfo
SELECT * FROM orderinfo
WHERE MONTH(paidtime) = 03 AND ispaid = "已支付"
#统计每个用户的消费次数
SELECT userid , count(1) as cons FROM orderinfo
WHERE MONTH(paidtime) = 03 AND ispaid = "已支付"
GROUP BY userid
#找到消费次数大于1的用户数,再计算复购率
SELECT count(1), SUM( IF(cons>1,1,0)) as 复购人数, SUM( IF(cons>1,1,0))/count(1) as 复购率
FROM (
SELECT userid,COUNT(orderid) as cons FROM orderinfo
where MONTH(paidtime) = 03 and ispaid ="已支付"
GROUP BY userid ) a
***特别注意复购人数的算法SUM( IF(cons>1,1,0))运行结果展示
计算回购率:思路是先统计每个用户在每年每月的消费情况,相邻月份进行关联,能关联上的用户说明就是回购。
#统计每个用户在每年每月的消费情况
SELECT userid,
DATE_FORMAT(paidTime,"%Y-%m-01") as paymonth,
count(1) as cons
from orderinfo
where isPaid="已支付"
group by userid, paymonth;
***
'%Y-%m-01':Y表示4位,y是2位;M是分钟,m是月份;
更多时间格式:https://www.w3school.com.cn/sql/func_date_format.asp
这里统计一个月内的数据,所以day全部设置为01
#相邻月份进行关联,能关联上的用户说明就是回购
SELECT * FROM (
SELECT userid,
DATE_FORMAT(paidTime,"%Y-%m-01") as paymonth,
count(1) as cons
from orderinfo
where isPaid="已支付"
group by userid, paymonth; ) a
LEFT JOIN (
SELECT userid,
DATE_FORMAT(paidTime,"%Y-%m-01") as paymonth,
count(1) as cons
from orderinfo
where isPaid="已支付"
group by userid, paymonth; ) b
ON a.userid = b.userid
AND DATE_SUB(b.paymonth , 1 month ) = a.paymonth
***注意不能使用inner join,要保留没有匹配上的用户数
函数:DATE_SUB(b.paymonth,INTERVAL 1 month)=a.paymonth
b减去一个月,回退一个月是a。a是3月的话,b就是4月。
#统计每个月份的消费人数情况及回购率
SELECT a.paymonth, count(b.userid) as 下月继续购买用户
,count(a.userid) as 本月购买用户,
count(b.userid)/count(a.userid) as 回购率
FROM (select
userid,
DATE_FORMAT(paidTime,"%Y-%m-01") as paymonth,
count(1) as cons
from orderinfo
where isPaid="已支付"
group by userid, paymonth) a
LEFT JOIN
(select
userid,
DATE_FORMAT(paidTime,"%Y-%m-01") as paymonth,
count(1) as cons
from orderinfo
where isPaid="已支付"
group by userid, paymonth) b
on a.userid=b.userid
and DATE_SUB(b.paymonth,INTERVAL 1 month)=a.paymonth
GROUP BY a.paymonth
***统计每个月,直接GROUP BY运行结果展示
4、统计多次消费的用户,第一次和最后一次消费间隔是多少天
思路:首先找到多次消费的用户及消费次数,取出第一次消费和最后一次消费的时间
#首先找到多次消费的用户及消费次数
SELECT userid, count(1) as cons
FROM orderinfo
WHERE ispaid ="已支付"
GROUP BY userid
HAVING count(1)>1
#取出第一次消费和最后一次消费的时间
SELECT userid,
MAX(paidTime) as 最近一次消费,
MIN(paidTime) as 第一次消费,
DATEDIFF(MAX(paidTime),MIN(paidTime)) as 时间差 #单位是日
FROM orderinfo
where isPaid="已支付"
GROUP BY userid
HAVING count(1)>1运行结果展示
5、统计不同年龄段,用户的消费金额是否有差异
思路:计算每个用户的年龄,并对年龄进行分层。关联订单信息,获取每个用户,年龄段,消费频次和消费金额。
#计算每个用户的年龄
SELECT userid , birth, NOW(),
TIMESTAMPDIFF(year,birth,NOW()) as age
FROM userinfo
#确定分层规则,对用户按年龄分层:0-10:1, 11-20:2, 21-30:3
SELECT userid, birth , now() ,
ceil(TIMESTAMPDIFF(year,birth,now()) /10) as age
FROM userinfo
***
ceil(n) 取大于等于数值n的最小整数;
floor(n) 取小于等于数值n的最小整数;
#关联订单信息,获取每个用户,年龄段,消费频次和消费金额
SELECT a.userid, age, sum(price) as prices ,count(1) as cons
FROM orderinfo a
LEFT JOIN
(
SELECT userid, birth , now() ,
ceil(TIMESTAMPDIFF(year,birth,now()) /10) as age
FROM userinfo
) b
ON a.userid = b.userid
WHERE isPaid="已支付" and age<10 and age is not null #数据清洗
GROUP BY a.userid, age
#按年龄段聚合
SELECT age, avg(prices) , avg(cons)
FROM (
SELECT a.userid, age, sum(price) as prices ,count(1) as cons
FROM orderinfo a
LEFT JOIN
(
SELECT userid, birth , now() ,
ceil(TIMESTAMPDIFF(year,birth,now()) /10) as age
FROM userinfo
) b
ON a.userid = b.userid
WHERE isPaid="已支付" and age<10 and age is not null #数据清洗
GROUP BY a.userid, age
) a
GROUP BY age
ORDER BY age运行结果展示
6、统计消费的二八法则,消费排名前20%用户,贡献了多少消费额
思路:统计每个用户的消费金额,并进行一个降序排序。
#统计每个用户的消费金额,并进行一个降序排序
SELECT userid, SUM(price) AS prices from orderinfo
WHERE ispaid = "已支付"
GROUP BY userid
ORDER BY sum(price) DESC
#统计一下一共有多少用户,以及总消费金额是多少
SELECT COUNT(1), SUM(prices) FROM
(
SELECT userid, SUM(price) AS prices from orderinfo
WHERE ispaid = "已支付"
GROUP BY userid
ORDER BY sum(price) DESC
) a
#取出前20%的用户进行金额统计,根据b得到用户数是85649,前20%就是大约17000
SELECT COUNT(1),sum(prices) FROM
(
SELECT userid, sum(price) AS prices from orderinfo
WHERE ispaid = "已支付"
GROUP BY userid
ORDER BY sum(price) DESC
LIMIT 17000
) a运行结果展示