数仓 资源层 增量 更新数据
select * from 库2.表1
union all
select a.* from 库1(原始数据层).表1 a
left outer join 库2(资源层).表1 b
on a.id = b.id where b.id is null;
然后根据分区字段 加载入新的分区
insert overwrite table 资源库.资源表(PARTITION(l_etl_date=‘时间’’))
select * from 初始库.初始表 as a
left join 资源库.资源表 on a.innerid = b.innerid where b.innerid is null
and etl_date>‘时间’
测试:
表stu
表stu2
select b.* from stu2 b
LEFT JOIN stu a
ON a.id = b.id
where a.id is null
stu相当于原始数据层
stu2相当于资源层
INSERT overwrite INTO TABLE stu
SELECT * FROM stu
UNION ALL
(select b.* from stu2 b
LEFT JOIN stu a
ON a.id = b.id
where a.id is null)
这样加载数据mysql不支持,hive支持
SELECT * FROM stu
UNION ALL
(select b.* from stu2 b
LEFT JOIN stu a
ON a.id = b.id
where a.id is null)
在这里插入图片描述