一个id对应多个城市,按城市排序,使用row_number() over()过滤去重的方式,只取排在第一的一条
insert overwrite table xxxx partition(ds=%YYYYMMDD%)
select
count(distinct id) as total_trace_user --
,sum(xx_time + oo_time) as xx_oo_time --
,sum(total_time) as total_time --
,sum(distance) as total_distance --
,sum(xx_distance) as xx_distance --
,sum(oo_distance) as oo_distance --
,IF(grouping(osv)==1,'all',osv) AS osv
,IF(grouping(city)==1,'all',city) AS city
,IF(grouping(channel)==1,'all', channel ) AS channel
,count(distinct (case when xx_distance > 0 then id end)) as navi_uv --
,count(distinct (case when oo_distance > 0 then id end)) as cruise_uv --
,count(distinct (case when oo_distance > 0 and xx_distance > 0 then id end)) as repeate_uv --
,sum(xx_time) as xx_time
,sum(oo_time) as oo_time
,IF(grouping(province)==1,'all', province ) AS province
,sum(sid_num) as total_trace --
from
(
select
id
,sid_num
,distance
,xx_distance
,oo_distance
,total_time
,xx_time
,oo_time
,channel
,city
,osv
,province
from (
select *
,row_number() over(partition by id order by city) as num --
from
xxxxx
where
ds = %YYYYMMDD%
) t
where t.num= 1
)a
group by
osv
,city
,channel
,province
with cube