一、错误实例
--剔除污染之后每一组人数不同
select
camp.group_id as group_id --实验分组
,int((datediff(arrive.arrive_dt,'2018-06-20')-1)/7)+1 as ob_week --观察周
,count(distinct camp.driver_id) as group_cnt --组人数
,count(distinct arrive.order_id) as arrive_cnt --完单数
,sum(beatles_split) as beatles_split --分账
,sum(gmv) as gmv --gmv
from
(
select
camp.driver_id as driver_id
,camp.group_id as group_id
,camp.extractor_name as extractor_name
,camp.user_ltv as user_ltv
,'2018-06-20' as start_date
from
(
select
distinct driver_id as driver_id
,group_id
,extractor_name
, '二期新手任务' as user_ltv
from beatles_strategy.mission_publiser_record_view_intern
where datetime='20180619'
and extractor_name='extractor_continuity_20180619'
and task_id in ('90','88','92')
)camp --活动分组信息
left outer join
(
select
driver_id
from beatles_dwd.dwd_order_arrive_d_view_intern
where (is_test!=1 or is_test is null)
--and status in (2,3,4,5,6,13,31)
and concat_ws('-',year,month,day) between '2018-06-15' and '2018-06-20'
and (to_date(arrive_time)) between '2018-06-15' and '2018-06-20'
group by driver_id
)t --剔除非新手的车主
on camp.driver_id = t.driver_id
where camp.driver_id is not null and t.driver_id is null
)camp --剔除了非新手的活动分组信息
left outer join
(
select
concat_ws('-',year,month,day) as arrive_dt --完单日期
,cast(driver_id as bigint) as driver_id
,cast(order_id as bigint) as order_id
from beatles_dwd.dwd_order_arrive_d_view_intern
where (is_test!=1 or is_test is null)
and concat_ws('-',year,month,day) between '2018-06-20' and '${end_date}'
group by cast(driver_id as bigint),cast(order_id as bigint),concat_ws('-',year,month,day)
)arrive --完单情况
on camp.driver_id=arrive.driver_id
left outer join
(
select
cast(relative_id as bigint) as order_id --订单id
,sum(beatles_split) as beatles_split --单个订单分账
,max(total) as gmv --单个订单gmv
from beatles_ods.payment_view_intern
where concat_ws('-',year,month,day) between '2018-06-20' and '${end_date}'
group by cast(relative_id as bigint)
)beatles_split
on arrive.order_id=beatles_split.order_id
group by group_id,int((datediff(arrive.arrive_dt,'2018-06-20')-1)/7)+1
order by group_id,int((datediff(arrive.arrive_dt,'2018-06-20')-1)/7)+1
这样在hive下是解释不通的,回顾一下SQL的执行顺序https://blog.csdn.net/TOMOCAT/article/details/81586789。order by是在SQL的最后执行的,从而应该改成:
二、修改:
--剔除污染之后每一组人数不同
select
camp.group_id as group_id --实验分组
,int((datediff(arrive.arrive_dt,'2018-06-20')-1)/7)+1 as ob_week --观察周
,count(distinct camp.driver_id) as group_cnt --组人数
,count(distinct arrive.order_id) as arrive_cnt --完单数
,sum(beatles_split) as beatles_split --分账
,sum(gmv) as gmv --gmv
from
--skip--
group by group_id,int((datediff(arrive.arrive_dt,'2018-06-20')-1)/7)+1
order by group_id,ob_week