源表:portrait1和portrait2
代码:
--1 差集
CREATE TABLE IF NOT EXISTS analysis_except
(
uid BIGINT
) lifecycle ${lifecycle};
set odps.stage.mapper.split.size= 1;
INSERT OVERWRITE TABLE analysis_except
SELECT u.uid from analysis_portrait1 u
left join analysis_portrait2 v
on u.uid=v.uid
where v.uid is null;
--2 交集
CREATE TABLE IF NOT EXISTS analysis_intersect
(
uid BIGINT
) lifecycle ${lifecycle};
set odps.stage.mapper.split.size= 1;
INSERT OVERWRITE TABLE analysis_intersect
SELECT u.uid FROM analysis_portrait1 u
INTERSECT
SELECT v.uid FROM analysis_portrait2 v;
--3 并集
CREATE TABLE IF NOT EXISTS analysis_union
(
uid BIGINT
) lifecycle ${lifecycle};
set odps.stage.mapper.split.size= 1;
INSERT OVERWRITE TABLE analysis_union
SELECT u.uid FROM analysis_portrait1 u
UNION
SELECT v.uid FROM analysis_portrait2 v;
另外,非MYSQL的应该可以用 EXCEPT 来求差集。