啥也不说了,先上代码:
--数据装载
insert overwrite ads_sku_info
select
'2020-06-14' dt,
source,
target,
count(*)
from
(
select
concat('step-', step, ':', source) source,
concat('step-', step+1,':', target) target
from
(
select
page_id,
lag(page_id, 1, null) over(partition by session_id order by ts rows between UNBOUNDED PRECEDING and unbounded following) source,
lead(page_id, 1, null) over(partition by session_id order by ts rows between UNBOUNDED preceding and unbounded following) target,
row_number() over (partition by session_id order by ts) step
from
(
select
last_page_id,
page_id,
ts,
concat(mid_id, '-', last_value(if(last_page_id is null, ts,null),true) over(partition by mid_id order by ts)) session_id
from dwd_page_log
where dt = '2020-06-14'
) t1
)t2
)t3
group by source, target;
在t3表中的select语句有聚合函数count(*)存在,还有source和target两个字段,如果不适用group by 以这两个字段分组,则会出现错误。
结论:
1.聚合函数和非聚合函数字段同时出现,应该使用group by 对非聚合字段进行分组。
2.如果只有聚合函数可以不使用group by
上面两个结论可自行验证。