比如,计算第一次、总量等,正常情况下需要用到所有历史数据进行计算。但有些表数据特别大,用全景历史数据计算比较费力,可能就算不出来。
这时,可以考虑用这种方式。其优点是,数据涉及到的数据量偏小;其缺点也很明显,需要从历史数据起始之日,一天天刷到当前,中间少一天不可。如果中间有一天出现问题,则后面的数据也全部出错。
可以在实践中,权衡利裨,根据实际情况,选择合适的方案。
1、样例表结构
CREATE TABLE dwf_seller_publish_agr_full_1d(
shop_id bigint COMMENT '用户id',
first_publish_time string COMMENT '第一次发拍时间',
last_publish_time string COMMENT '最后一次发拍时间',
publish_cnt_d string COMMENT '当天发拍数',
publish_cnt_30d string COMMENT '近30天发拍数',
publish_cnt_60d string COMMENT '近60天发拍数',
publish_cnt_90d string COMMENT '近90天发拍数',
publish_cnt_all bigint COMMENT '发拍总量',
publish_cnt_live bigint COMMENT '发拍直播拍品量',
publish_cnt_no_live bigint COMMENT '发拍非直播拍品量',
publish_cnt_ykj bigint COMMENT '一口价总发拍量(所有的)',
publish_cnt_ykj_zb bigint COMMENT '一口价直播发拍量',
publish_cnt_yyp bigint COMMENT '一元拍总发拍量',
no_ykj_sales bigint COMMENT '展示拍品数,排除一口价',
ykj_sales bigint COMMENT '展示拍品数,一口价拍品',
yyp_sales bigint COMMENT '展示拍品数,一元拍拍品')
comment '用户宽表-发拍聚合'
partitioned by (dt string)
STORED AS orc tblproperties ("orc.compress"="SNAPPY");
2、计算sql
insert overwrite table ${c.to.dwf}.dwf_seller_publish_agr_full_1d partition(dt='${dt}')
select
nvl(a.shop_id,b.shop_id),
nvl(b.first_publish_time,a.first_publish_time),
nvl(a.last_publish_time,b.last_publish_time),
nvl(a.publish_cnt_d,0),
nvl(a.publish_cnt_30d,0),
nvl(a.publish_cnt_60d,0),
nvl(a.publish_cnt_90d,0),
nvl(a.publish_cnt_d,0) + nvl(b.publish_cnt_all,0),
nvl(a.publish_cnt_live,0) + nvl(b.publish_cnt_live,0),
nvl(a.publish_cnt_d,0) - nvl(a.publish_cnt_live,0) + nvl(b.publish_cnt_no_live,0),
nvl(a.publish_cnt_ykj_day,0) + nvl(b.publish_cnt_ykj,0),
nvl(a.publish_cnt_ykj_zb_day,0) + nvl(b.publish_cnt_ykj_zb,0),
nvl(a.publish_cnt_yyp_day,0) + nvl(b.publish_cnt_yyp,0),
nvl(a.no_ykj_sales,0),
nvl(a.ykj_sales,0),
nvl(a.yyp_sales,0)
FROM
(select shop_id,
min(publish_time) AS first_publish_time,
max(publish_time) AS last_publish_time,
sum(if(dt='${dt}',multi_wins,0)) AS publish_cnt_d,
sum(if(dt>date_sub('${dt}',30),multi_wins,0)) AS publish_cnt_30d,
sum(if(dt>date_sub('${dt}',60),multi_wins,0)) AS publish_cnt_60d,
sum(if(dt>date_sub('${dt}',90),multi_wins,0)) AS publish_cnt_90d,
sum(if(dt='${dt}' and is_live=1,multi_wins,0)) AS publish_cnt_live,
sum(if(dt='${dt}' and (sale_type=7 or sale_type=12),multi_wins,0)) AS publish_cnt_ykj_day,
sum(if(dt='${dt}' and sale_type=12,multi_wins,0)) AS publish_cnt_ykj_zb_day,
sum(if(dt='${dt}' and sale_type=11,multi_wins,0)) AS publish_cnt_yyp_day,
sum(if(sale_type!=7 and sale_type !=12 and end_date>date_sub('${dt}',1),multi_wins,0)) AS no_ykj_sales,
sum(if((sale_type=7 or sale_type =12) and end_date>date_sub('${dt}',1),multi_wins,0)) AS ykj_sales,
sum(if(sale_type=11 and end_date>date_sub('${dt}',1),multi_wins,0)) AS yyp_sales
from ${c.from.dwf}.dwf_seller_publish_incr_1d where dt>date_sub('${dt}',90) and dt <='${dt}' and pid = '0' group by shop_id
) a
full JOIN
(select * from ${c.from.dwf}.dwf_seller_publish_agr_full_1d where dt = date_sub('${dt}',1)) b
on a.shop_id=b.shop_id;