-- 删除f_1,f_2,f_3完全一致重复记录 保留重复记录中f_mt最大的一个
create table open.t_ttt(
f_1 character varying(128),
f_2 character varying(128),
f_3 smallint,
f_ct timestamp without time zone,
f_mt timestamp without time zone
) WITH (appendonly=true) DISTRIBUTED BY (f_1);
-- 保留重复记录中f_mt最大的一个
delete from open.t_ttt
where (f_1,f_2,f_3) in (select f_1,f_2,f_3 from open.t_ttt group by f_1,f_2,f_3 having count(*)>1)
and (f_1,f_2,f_3,f_mt) not in (select f_1,f_2,f_3,max(f_mt) from open.t_ttt group by f_1,f_2,f_3 having count(*)>1);
-- 删除重复记录 f_id全局唯一
create table open.t_test(
f_id bigint,
f_a character varying(128),
f_b character varying(128),
f_c smallint,
f_ct timestamp without time zone,
f_mt timestamp without time zone
) WITH (appendonly=true) DISTRIBUTED BY (f_id);
-- 重复记录完全不保留
DELETE FROM open.t_test WHERE (f_a,f_b,f_c) in (select f_a,f_b,f_c from open.t_test group by f_a,f_b,f_c having count(*)>1);
-- f_id是全表唯一键的情况下 保留f_id最大的行
delete from open.t_test
where (f_a,f_b,f_c) in (select f_a,f_b,f_c from open.t_test group by f_a,f_b,f_c having count(*)>1)
and f_id not in (select max(f_id) from open.t_test group by f_a,f_b,f_c having count(*)>1);