删除重复记录

CREATE TABLE #t1(ID INT NULL, VALUE VARCHAR(2))
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')
INSERT INTO #t1(ID, VALUE) VALUES (2,'bb')
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')
INSERT INTO #t1(ID, VALUE) VALUES (3,'cc')
INSERT INTO #t1(ID, VALUE) VALUES (3,'cc')
GO

-- BINARY_CHECKSUM(<column names>): <column names> are columns that we want to compare duplicates for
--
if you want to compare the full row then change BINARY_CHECKSUM(<column names>) -> BINARY_CHECKSUM(*)
      
-- for SQL Server 2000+ a loop
--
save checksums and rowcounts for duplicates
SELECT BINARY_CHECKSUM(ID, VALUE) AS ChkSum, COUNT(*) AS Cnt
INTO #t2
FROM #t1
GROUP BY BINARY_CHECKSUM(ID, VALUE) HAVING COUNT(*)>1

DECLARE @ChkSum BIGINT, @rc INT
-- get the first checksum and set the rowcount to the count - 1
--
because we want to leave one duplicate
SELECT TOP 1 @ChkSum = ChkSum, @rc = Cnt-1 FROM #t2

WHILE EXISTS (SELECT * FROM #t2)
BEGIN   
   
-- rowcount is one less than the duplicate rows count
    SET ROWCOUNT @rc
   
DELETE FROM #t1 WHERE BINARY_CHECKSUM(ID, VALUE) = @ChkSum
   
-- remove the processed duplicate from the checksum table
    DELETE #t2 WHERE ChkSum = @ChkSum
   
-- select the next duplicate rows to delete
    SELECT TOP 1 @ChkSum = ChkSum, @rc = Cnt-1 FROM #t2   
END
SET ROWCOUNT 0
GO

SELECT * FROM #t1

-- for SQL Server 2005+ a cool CTE
;WITH Numbered
AS
(
   
SELECT ROW_NUMBER() OVER (PARTITION BY ChkSum ORDER BY ChkSum) AS RN, *
   
FROM (
            
SELECT BINARY_CHECKSUM(ID, VALUE) AS ChkSum
            
FROM #t1
         ) t
)
DELETE FROM Numbered WHERE RN > 1;
GO

SELECT * FROM #t1

DROP TABLE #t1;
DROP TABLE #t2;

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值