一个稍微复杂点的hive sql

with abc as
(
select 
substring(a.pay_time,1,10) dt,
((if(oobi_consume>0,(oobi_consume*1.00/100), 0))+ (if(giftcard_consume>0,(giftcard_consume*1.00/100), 0)))
pay_fee,
a.xxx_id as uid
from order_all a inner join oobi_trade b on a.order_id = b.order_id
where (oobi_consume> 0 or giftcard_consume>0) and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01')
union all
select substring(a.pay_time,1,10) dt,
(b.pay_fee*1.00/100) pay_fee,
a.uid
from unidied_orders a inner join payments b on a.order_id=b.order_id
where pay_fee>0 and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01')
)
select *, 
round((total_pay_fee/total_pay_users),2) as avg_pay,
round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,
round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30
from (
select 1 as dt, 
count(distinct if(pay_fee>0 and dt='2016-03-01', uid, 0)) as total_pay_users,
sum(if(pay_fee>0 and dt='2016-03-01', pay_fee, 0)) as total_pay_fee,
count(distinct if(pay_fee>0 and dt>='2016-02-24', uid, 0)) as total_pay_users_7,
sum(if(pay_fee>0 and dt>='2016-02-24', pay_fee, 0)) as total_pay_fee_7,
count(distinct if(pay_fee>0 and dt>='2016-02-02', uid, 0)) as total_pay_users_30,
sum(if(pay_fee>0 and dt>='2016-02-02', pay_fee, 0)) as total_pay_fee_30
from abc
) a
union all
select *, 
round((total_pay_fee/total_pay_users),2) as avg_pay,
round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,
round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30
from (
select 2 as dt, 
count(distinct if(pay_fee>0 and dt='2016-03-02', uid, 0)) as total_pay_users,
sum(if(pay_fee>0 and dt='2016-03-02', pay_fee, 0)) as total_pay_fee,
count(distinct if(pay_fee>0 and dt>='2016-02-25', uid, 0)) as total_pay_users_7,
sum(if(pay_fee>0 and dt>='2016-02-25', pay_fee, 0)) as total_pay_fee_7,
count(distinct if(pay_fee>0 and dt>='2016-02-03', uid, 0)) as total_pay_users_30,
sum(if(pay_fee>0 and dt>='2016-02-03', pay_fee, 0)) as total_pay_fee_30
from abc
) a
union all
select *, 
round((total_pay_fee/total_pay_users),2) as avg_pay,
round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,
round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30
from (
select 3 as dt, 
count(distinct if(pay_fee>0 and dt='2016-03-03', uid, 0)) as total_pay_users,
sum(if(pay_fee>0 and dt='2016-03-03', pay_fee, 0)) as total_pay_fee,
count(distinct if(pay_fee>0 and dt>='2016-02-26', uid, 0)) as total_pay_users_7,
sum(if(pay_fee>0 and dt>='2016-02-26', pay_fee, 0)) as total_pay_fee_7,
count(distinct if(pay_fee>0 and dt>='2016-02-04', uid, 0)) as total_pay_users_30,
sum(if(pay_fee>0 and dt>='2016-02-04', pay_fee, 0)) as total_pay_fee_30
from abc
) a
union all
select *, 
round((total_pay_fee/total_pay_users),2) as avg_pay,
round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,
round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30
from (
select 4 as dt, 
count(distinct if(pay_fee>0 and dt='2016-03-04', uid, 0)) as total_pay_users,
sum(if(pay_fee>0 and dt='2016-03-04', pay_fee, 0)) as total_pay_fee,
count(distinct if(pay_fee>0 and dt>='2016-02-27', uid, 0)) as total_pay_users_7,
sum(if(pay_fee>0 and dt>='2016-02-27', pay_fee, 0)) as total_pay_fee_7,
count(distinct if(pay_fee>0 and dt>='2016-02-05', uid, 0)) as total_pay_users_30,
sum(if(pay_fee>0 and dt>='2016-02-05', pay_fee, 0)) as total_pay_fee_30
from abc
) a;


以上查询数据量太大,hive跑不起来,优化之后的查询如下:


with abc as
(
select 
substring(a.pay_time,1,10) dt,
(sum(if(oobi_consume>0,(oobi_consume*1.00/100), 0))+ sum(if(giftcard_consume>0,(giftcard_consume*1.00/100), 0)))
pay_fee,
a.user_id as uid
from order_all a inner join oobi_trade b on a.order_id = b.order_id
where (oobi_consume> 0 or giftcard_consume>0) and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01')
group by substring(a.pay_time,1,10), a.user_id
union all
select substring(a.pay_time,1,10) dt,
sum(b.pay_fee*1.00/100) pay_fee,
a.uid
from unidied_orders a inner join payments b on a.order_id=b.order_id
where pay_fee>0 and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-03-01')
group by substring(a.pay_time,1,10), a.uid
) 
select '2016-03-01' as dd, 
count(distinct if(pay_fee>0 and dt='2016-03-01', uid, 0)) as total_pay_users,
sum(if(pay_fee>0 and dt='2016-03-01', pay_fee, 0)) as total_pay_fee,
count(distinct if(pay_fee>0 and dt>='2016-02-24', uid, 0)) as total_pay_users_7,
sum(if(pay_fee>0 and dt>='2016-02-24', pay_fee, 0)) as total_pay_fee_7,
count(distinct if(pay_fee>0 and dt>='2016-02-02', uid, 0)) as total_pay_users_30,
sum(if(pay_fee>0 and dt>='2016-02-02', pay_fee, 0)) as total_pay_fee_30
from abc
union all
select '2016-03-02' as dd, 
count(distinct if(pay_fee>0 and dt='2016-03-02', uid, 0)) as total_pay_users,
sum(if(pay_fee>0 and dt='2016-03-02', pay_fee, 0)) as total_pay_fee,
count(distinct if(pay_fee>0 and dt>='2016-02-25', uid, 0)) as total_pay_users_7,
sum(if(pay_fee>0 and dt>='2016-02-25', pay_fee, 0)) as total_pay_fee_7,
count(distinct if(pay_fee>0 and dt>='2016-02-03', uid, 0)) as total_pay_users_30,
sum(if(pay_fee>0 and dt>='2016-02-03', pay_fee, 0)) as total_pay_fee_30
from abc
union all
select '2016-03-03' as dd,
count(distinct if(pay_fee>0 and dt='2016-03-03', uid, 0)) as total_pay_users,
sum(if(pay_fee>0 and dt='2016-03-03', pay_fee, 0)) as total_pay_fee,
count(distinct if(pay_fee>0 and dt>='2016-02-26', uid, 0)) as total_pay_users_7,
sum(if(pay_fee>0 and dt>='2016-02-26', pay_fee, 0)) as total_pay_fee_7,
count(distinct if(pay_fee>0 and dt>='2016-02-04', uid, 0)) as total_pay_users_30,
sum(if(pay_fee>0 and dt>='2016-02-04', pay_fee, 0)) as total_pay_fee_30
from abc
union all
select '2016-03-04' as dd, 
count(distinct if(pay_fee>0 and dt='2016-03-04', uid, 0)) as total_pay_users,
sum(if(pay_fee>0 and dt='2016-03-04', pay_fee, 0)) as total_pay_fee,
count(distinct if(pay_fee>0 and dt>='2016-02-27', uid, 0)) as total_pay_users_7,
sum(if(pay_fee>0 and dt>='2016-02-27', pay_fee, 0)) as total_pay_fee_7,
count(distinct if(pay_fee>0 and dt>='2016-02-05', uid, 0)) as total_pay_users_30,
sum(if(pay_fee>0 and dt>='2016-02-05', pay_fee, 0)) as total_pay_fee_30
from abc;

最后改进:

with t0 as (
select substring(pay_time,1,10) dt,user_id,sum(b.xxbi_consume+b.giftcard_consume) pay_fee
from order_all a inner join xxbi_trade b on a.order_id = b.order_id 
where substring(pay_time,1,10)>='2016-04-17' and substring(pay_time,1,10)<='2016-04-30'
group by substring(pay_time,1,10),user_id
union all
select substring(a.pay_time,1,10) dt,a.uid user_id,sum(b.pay_fee) pay_fee
from unified_orders a inner join payments b on a.order_id=b.order_id
where pay_type in ('WXAPP','WXWAP')
and substring(b.pay_time,1,10)>='2016-04-17' and substring(b.pay_time,1,10)<='2016-04-30'
group by substring(a.pay_time,1,10),a.uid
),t1 as (
select distinct substring(pay_time,1,10) dt from order_all where substring(pay_time,1,10)>='2016-04-17' and substring(pay_time,1,10)<='2016-04-30'
),t2 as (
select dt,user_id,sum(pay_fee) pay_fee from t0 group by dt,user_id
),t3 as (
select t2.*,t1.dt zdt from t1 join t2 on 1=1
)
select dt,
sum(case when (datediff(dt,zdt)=0) then pay_fee end) pay,
count(distinct case when (datediff(dt,zdt)=0) then user_id end) user_cnt,
sum(case when datediff(dt,zdt)>0 and datediff(dt,zdt)<=7 then pay_fee end ) pay_seven,
count(distinct case when datediff(dt,zdt)>0 and datediff(dt,zdt)<=7 then user_id end) user_seven
from t3 
where dt >= '2016-04-24'and dt<= '2016-04-30' 
group by dt;






  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值