接hive分析(二)
每个用户有多少个订单?
select t.user_id ,count(1) as order_count
from orders t
group by t.user_id
order by order_count desc
limit 10;
二、每个用户平均每个订单平均是多少商品
因为orders表中只有用户和订单的数据,需要关联priors或者trains表,才能获得到订单的数据。因为trains表中的数据量比较少,但是trains中因为是作为标签的数据,只有一个订单的数据。
可以取部分的priors来作为进行代码调试计算。加limit
select ord.user_id,avg(pri.products_cnt) as avg_prod
from
(select order_id,user_id from orders) ord
join
(select order_id,count(1) as products_cnt from priors group by order_id)pri
on ord.order_id=pri.order_id
group by ord.user_id
limit 10;
select user_id,
sum(prod_cnt)/count(1) as sc_prod,
avg(prod_cnt) as avg_prod
from (select * from orders where eval_set='prior')od --如果不是prior统计为0
join (
select order_id,count(1) as prod_cnt
from priors
group by order_id
limit 10000)pro
on od.order_id=pro.order_id
group by user_id
limit 10;
三、每个用户在一周中的购买订单的分布(列转行)
select
user_id,
sum(case order_dow when '0' then 1 else 0 end) as dow_0, --表示周一
sum(case order_dow when '1' then 1 else 0 end) as dow_1,
sum(case order_dow when '2' then 1 else 0 end) as dow_2,
sum(case order_dow when '3' then 1 else 0 end) as dow_3,
sum(case order_dow when '4' then 1 else 0 end) as dow_4,
sum(case order_dow when '5' then 1 else 0 end) as dow_5,
sum(case order_dow when '6' then 1 else 0 end) as dow_6
from orders
group by user_id
limit 20;