谷歌商店购物表简介
数据库包含用户表、用户交易表、2017年交易表、2018年交易表、2019年交易表以及退货表
1.表内容
user_info
列名 | 数据类型 | 示例 |
---|---|---|
user_id | string | 101(唯一值) |
user_name | string | Nick(唯一值) |
sex | string | [male,female] |
age | int | 20 |
city | string | beijing |
firstactivetime | string | (2017-05-03 12:40:28) |
level | int | [1,10] |
extra1 | string | {“systemtype”:“ios”,“education”:“bachelor”,“marriage_status”:“1”,“phonebrand”:“iphone X”} |
extra2 | map<string,string> | {“systemtype”:“ios”,“education”:“bachelor”,“marriage_status”:“1”,“phonebrand”:“iphone X”} |
user_trade,以dt分区
列名 | 数据类型 | 示例 |
---|---|---|
user_name | string | Nick |
piece | int | 4 |
price | double | 486.3 |
pay_amount | double | 5464.7 |
goods_category | string | game |
pay_time | bigint | 1483676540(时间戳) |
trade_2017
列名 | 数据类型 | 示例 |
---|---|---|
user_name | string | Nick |
amount | double | 425 |
trade_time | string | (2017-07-08 21:30:59) |
trade_2018
列名 | 数据类型 | 示例 |
---|---|---|
user_name | string | Nick |
amount | double | 487 |
trade_time | string | (2018-11-08 22:52:42) |
trade_2019
列名 | 数据类型 | 示例 |
---|---|---|
user_name | string | Nick |
amount | double | 555 |
trade_time | string | (2019-01-24 22:46:23) |
user_refund,以dt分区
列名 | 数据类型 | 示例 |
---|---|---|
user_name | string | Nick |
amount | double | 555 |
trade_time | string | (2019-01-24 22:46:23) |
2.案例分析
2017-2019年有交易但是没退款的用户的手机品牌分布
SELECT d.phonebrand
count(c.user_name)
FROM
(SELECT a.user_name
FROM
(SELECT user_name
FROM trade_2017
union
SELECT user_name
FROM trade_2018
union
SELECT user_name
FROM trade_2019)a
LEFT JOIN
(SELECT distinct user_name
FROM user_refund
WHERE dt>'0')b
on a.user_name=b.user_name
WHERE b.user_name is null) c
LEFT JOIN
(SELECT user_name
extra2['phonebrand'] as phonebrand
FROM user_info) d
on c.user_name-d.user_name
GROUP BY d.phonebrand;
对2018年每三个月的近三个月进行移动求平均金额
SELECT a.month
a.pay_amount
avg(a.pay)amount) over(order by a.month rows between 2 preceding and current row)
FROM
(SELECT month(dt) month
sum(a.pay_amount) pay_amount
FROM user_trade
WHERE year(dt)=2018
GROUP BY month(dt))a;
2019每个用户支付与退款汇总
SELECT a.user_name.
sum(a.pay_amount)
sum(a.refund_amount)
FROM
(SELECT user_name,
sum(pay_amount) pay_amount,
0 as refund_amount
FROM user_trade
WHERE year(dt)=2019
GROUP BY user_name
UNION ALL
SELECT user_name,
0 as pay_amount,
sum(refund_amount) refund_amount
FROM user_refund
WHERE year(dt)=2019
GROUP BY user_name)a
GROUP BY a.user_name;
2019年每个城市,不同性别支付金额top3的用户
SELECT c.user_name,
c.city,
c.sex,
c.pay_amount
c.rank
FROM
(SELECT a.user_name,
b.city,
b.sex,
a.pay_amount,
row_number() over(partition by b.city,b.sex order by a.pay_amount desc) rank
FROM
(SELECT user_name,
sum(pay_amount) pay_amount
FROM user_trade
WHERE year(dt)=2019
GROUP BY user_name)a
LEFT JOIN user_info b
on a.user_name=b.user_name)c
WHERE c.rank<=3;
分析支付时间间隔超过100天的用户
SELECT count(distinct user_name)
FROM
(SELECT user_name,
dt,
lead(dt) over(partition by user_name order by dt) lead_dt
FROM user_trade
WHERE dt>'0') a
WHERE a.lead_dt is not null
and datediff(a.lead_dt,dt)>100;
对2019年7月份用户的购买爱好分析
用户购买商品品类数量排名
SELECT user_name,
count(distinct goods_category),
row_number() over(order by count(distinct goods_category))
FROM user_trade
WHERE substr(dt,1,7)='2019-07'
GROUP BY user_name;
分析2019年退款金额前10%的用户
SELECT a.user_name,
a.refund_amount,
a.level
FROM
(SELECT user_name,
sum(refund_amount) refund_amount,
ntile(10) over(order by sum(refund_amount))level
FROM user_refund
WHERE year(dt)=2019
GROUP BY user_name)a
WHERE a.level=1;
找出几个资深吃货做推广
找出2019年购买商品品类时food,支付金额前5的客户
SELECT user_name,
sum(pay_amount)
FROM user_trade
WHERE dt between '2019-01-01' and '2019-12-31'
GROUP BY user_name
ORDER BY sum(pay_amount) desc
LIMIT 5;
无退款用户分析
SELECT a.user_name
FROM
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)=2019)a
LEFT JOIN
(SELECT distinct user_name
FROM user_refund
WHERE year(dt)=2019)b
on a.user_name=b.user_name
WHERE b.user_name is null;
对2019年第一季度商品热度与价值度进行分析
2019年1-3月对每个品类的购买数与累计金额
SELECT good_category,
count(distinct user_name) as user_sum,
sum(pay_amount) as total_amount
FROM user_trade
WHERE dt between '2019-01-01' and '2019-03-31'
GROUP BY goods_category
2019年9月给支付金额top5且金额大于5万元用户发放VIP券
SELECT user_name,
sum(pay_amount) as total_amount
FROM user_trade
WHERE month(dt)=9
GROUP BY user_name
HAVING sum(pay_amount)>50000
ORDER BY total_amount
LIMIT 5;
对2019元旦新用户推广作价值分析,即拉新分析。
即用户首次激活时间与元旦的日期间隔
SELECT user_name,
datediff(to_date(firstactivetime),'2019-01-01')
FROM user_info
LIMIT 10;
对用户的年龄段进行分析,观察用户年龄分布。
SELECT case when age<20 then '20岁以下'
when age>=20 and age<30 then '20-30岁'
when age>=30 and age<40 then '30-40岁'
else '40岁以上' end age_type,
count(distinct user_id) user_num
FROM user_info
GROUP BY case when age<20 then '20岁以下'
when age>=20 and age<30 then '20-30岁'
when age>=30 and age<40 then '30-40岁'
else '40岁以上' end;
观察用户等级随性别的分布情况
SELECT sex,
if(level>5,'高','低') level_type
count(distinct user_id) user_num
FROM user_info
GROUP BY sex,
if(level>5,'高','低');
分析每个月的拉新情况
SELECT substr(firstactivetime,1,7) month,
count(distinct user_id) user_num
FROM user_info
GROUP BY substr(firstactivetime,1,7);
不同手机品牌的用户数
SELECT get_json_object(extra1,'$.phonebrand') phone_brand,
count(distinct user_id) user_num
FROM user_info
GROUP BY get_json_object(extra1,'$.phonebrand');
#extra1是json类型,extra2是map类型
#使用extra2则是extra2['phonebrand']
激活天数距今超过300天的男女分布情况
SELECT sex,
count(user_id)
FROM user_info
WHERE datediff(current_date(),to_date(firstactivetime))>300
GROUP BY sex;
不同性别、教育程度的分布情况
SELECT sex,
extra['education'],
count(user_id)
FROM user_info
GROUP BY sex,
extra2['education'];
从2019年1月1日志2019年6月30日,每个时段的不同品类购买金额分布
按每小时分组,from_unixtime()中小时用HH代表24小时制
SELECT substr(from_unixtime(pay_time,'yyyy-MM-dd HH'),12), --24小时制
goods_category,
sum(pay_amount)
FROM user_trade
WHERE dt between '2019-01-01' and '2019-06-30'
GROUP BY substr(from_unixtime(pay_time,'yyyy-MM-dd HH'),12),
goods_category;
调查用户满意度
再2018年购买又退款的用户
SELECT a.user_name
FROM
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)=2019)a
JOIN
(SELECT distinct user_name
FROM user_refund
WHERE year(dt)=2019)b
on a.user_name=b.user_name;
某项目用户忠诚度分析
18与19年都购买的用户
SELECT a.user_name
FROM
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)=2018)a
JOIN
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)=2019)b
on a.user_name=b.user_name;