- 数据
drop table test;
create table test(id int, name varchar, description varchar);
insert into test values (1,'a','a');
insert into test values (1,'a','b');
insert into test values (1,'a','c');
insert into test values (1,'a','c');
insert into test values (2,'a','a');
insert into test values (2,'a','b');
insert into test values (2,'a','c');
insert into test values (2,'a','c');
select * from test;
select DISTINCT * from test;
- 函数
CREATE OR REPLACE FUNCTION func_remove_duplicate_records(table_name varchar)
RETURNS INTEGER
AS
$BODY$
DECLARE
rec record;
column_list varchar;
partition_by_columns varchar;
sql_text varchar;
BEGIN
SELECT array_agg(attname)::varchar into column_list FROM pg_attribute WHERE attrelid = table_name::regclass and attname not in ('tableoid','cmax','cmin','xmin','xmax','ctid');
partition_by_columns = split_part(split_part(column_list,'{',2),'}',1);
sql_text = format('delete from test
where ctid =
any(
array(
select ctid from (
select row_number() over(partition by %1$s order by ctid) as rn, ctid from test
) t
where t.rn<>1
)
);
',partition_by_columns);
execute sql_text;
RETURN 0;
END;
$BODY$
LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER;
select * from test;
- 实验:
postgres=# select * from test;
id | name | description
----+------+-------------
1 | a | a
1 | a | b
1 | a | c
1 | a | c
2 | a | a
2 | a | b
2 | a | c
2 | a | c
(8 rows)
postgres=# select DISTINCT * from test;
id | name | description
----+------+-------------
2 | a | b
1 | a | b
1 | a | a
1 | a | c
2 | a | c
2 | a | a
(6 rows)
postgres=#
postgres=# select * from func_remove_duplicate_records('test');
func_remove_duplicate_records
-------------------------------
0
(1 row)
postgres=# select * from test;
id | name | description
----+------+-------------
1 | a | a
1 | a | b
1 | a | c
2 | a | a
2 | a | b
2 | a | c
(6 rows)
- 总结:通过上面的方法就可以在没有主键或者唯一键的情况下删除重复记录,如果有多张表,则可以配合shell脚本批量删除表中的重复记录
参考:https://github.com/digoal/blog/blob/master/201706/20170602_01.md