PG中删除没有主键与唯一键的表的重复记录

  • 数据
drop table test;
create table test(id int, name varchar, description varchar);

insert into test values (1,'a','a');
insert into test values (1,'a','b');
insert into test values (1,'a','c');
insert into test values (1,'a','c');


insert into test values (2,'a','a');
insert into test values (2,'a','b');
insert into test values (2,'a','c');
insert into test values (2,'a','c');


select * from test;
select DISTINCT * from test;

  • 函数
CREATE OR REPLACE FUNCTION func_remove_duplicate_records(table_name varchar) 
RETURNS INTEGER 
AS
$BODY$
DECLARE
	rec record;
	column_list  varchar;
	partition_by_columns varchar;
	sql_text varchar;
BEGIN

	SELECT array_agg(attname)::varchar into column_list FROM pg_attribute WHERE attrelid = table_name::regclass and attname not in ('tableoid','cmax','cmin','xmin','xmax','ctid');
	partition_by_columns = split_part(split_part(column_list,'{',2),'}',1);
    sql_text = format('delete from test 
 				where ctid = 
 				any(
 					array( 
 							select ctid from (
 							select row_number() over(partition by %1$s order by ctid) as rn, ctid from test
 							) t
 				 	where t.rn<>1
 					)
 				); 
 			  ',partition_by_columns);
   	execute sql_text;
    RETURN 0;
END;
$BODY$
LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER;
select * from test;
  • 实验:
postgres=# select * from test;
 id | name | description 
----+------+-------------
  1 | a    | a
  1 | a    | b
  1 | a    | c
  1 | a    | c
  2 | a    | a
  2 | a    | b
  2 | a    | c
  2 | a    | c
(8 rows)

postgres=# select DISTINCT * from test;
 id | name | description 
----+------+-------------
  2 | a    | b
  1 | a    | b
  1 | a    | a
  1 | a    | c
  2 | a    | c
  2 | a    | a
(6 rows)

postgres=# 
postgres=# select * from func_remove_duplicate_records('test');
 func_remove_duplicate_records 
-------------------------------
                             0
(1 row)

postgres=# select * from test;
 id | name | description 
----+------+-------------
  1 | a    | a
  1 | a    | b
  1 | a    | c
  2 | a    | a
  2 | a    | b
  2 | a    | c
(6 rows)
  • 总结:通过上面的方法就可以在没有主键或者唯一键的情况下删除重复记录,如果有多张表,则可以配合shell脚本批量删除表中的重复记录

参考:https://github.com/digoal/blog/blob/master/201706/20170602_01.md

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值