背景
A任务在凌晨1点到3点,平均耗时1h,且是核心公共任务,急需优化。
整体逻辑示意图:
// 从tableA读取一次数据,放到临时表t1
DROP TABLE IF EXISTS temp.tmp_xx_$date_1;
CREATE TABLE IF NOT EXISTS temp.tmp_xxx_$date_1
as
select
xxx
from tableA
where xxxx;
// 从临时表t1读取和转换数据,得临时表t2
DROP TABLE IF EXISTS temp.tmp_xx_$date_2;
CREATE TABLE IF NOT EXISTS temp.tmp_xxx_$date_2
as
SELECT
device_id
,c2
FROM
(
SELECT
device_id
,c2
,ROW_NUMBER() OVER (PARTITION BY device_id ORDER BY time DESC) AS num
FROM
(
select
device_id
,c2
from temp.tmp_xx_$date_1
where !isempty(c2)
)t
)t
WHERE t.num=1;
// 从临时表t1读取和转换数据,得临时表t3
DROP TABLE IF EXISTS temp.tmp_xx_$date_3;
CREATE TABLE