`将表名或字段名括起来:
-- Clear first
DROP TABLE IF EXISTS `tb_score`;
-- Create the table
CREATE TABLE `tb_score`(
`id` mediumint,
`name` varchar(20),
`course` varchar(30),
`score` smallint
);
-- Batch insert, supported by MySQL and DB2
INSERT INTO tb_score
(id, name, course, score)
VALUES
(1, 'John', 'Biology', 90),
(2, 'John', 'Biology', 90),
(3, 'Lisa', 'Chemistry', 80),
(4, 'John', 'Biology', 90);
DB2
数据库中存在重复记录分为两种:
第一种情形、某些字段的值相同
第二种情形、所有字段的值相同
下面是通用SQL语句,对所有关系型数据库的一和二情形都适用:
查询重复记录,比如姓名-课程出现重复:
SELECT
name,
course,
COUNT(*) AS "count"
FROM
tb_score
GROUP BY
name,
course
HAVING
COUNT(*) > 1
结果:
+------+---------+-------+
| name | course | count |
+------+---------+-------+
| John | Biology | 3 |
+------+---------+-------+
在上面基础上,查询所有的重复记录:
SELECT
*
FROM
tb_score a
WHERE
(
a.name, a.course) IN
(
SELECT
b.name,
b.course
FROM
tb_score b
GROUP BY
b.name,
b.course
HAVING
COUNT(1) > 1)
结果:
+------+------+---------+-------+
| id | name | course | score |
+------+------+---------+-------+
| 1 | John | Biology | 90 |
| 2 | John | Biology | 90 |
| 4 | John | Biology | 90 |
+------+------+---------+-------+
第一种情形:
a. 采用普通SQL连接的方式,条件是这些字段的值相同,并且其它字段的值大于或小于对应字段的值(只能取一种,等于表示同一条记录)
查询多余的重复记录:
SELECT
*
FROM
tb_score a
WHERE
EXISTS
(
SELECT
1
FROM
tb_score b
WHERE
a.name=b.name
AND a.course=b.course
AND a.id > b.id)
结果:
+------+------+---------+-------+
| id | name | course | score |
+------+------+---------+-------+
| 2 | John | Biology | 90 |
| 4 | John | Biology | 90 |
+------+------+---------+-------+
删除多余的重复记录:
-- MySQL doesn't support
DELETE
FROM
tb_score a
WHERE
EXISTS
(
SELECT
1
FROM
tb_score b
WHERE
a.name = b.name
AND a.course = b.course
AND a.id > b.id)
b. 采用特定于数据库的SQL语句,和第二种情形的写法类似。
第二种情形
不同的数据库有不同的解决方案,SQL Server是用DISTINCT关键字,Oracle是ROWID,DB2是ROW_NUMBER OVER(PARTITION BY [FIELDS] ORDER BY [FIELDS])
准备数据:
-- Drop and create table are omitted
-- Batch insert
INSERT INTO tb_score
(id, name, course, score)
VALUES
(1, 'John', 'Biology', 90),
(1, 'John', 'Biology', 90),
(2, 'Lisa', 'Chemistry', 80),
(1, 'John', 'Biology', 90);
SQL Server
删除多余的重复记录:
CREATE TABLE
#tmp AS
SELECT DISTINCT
*
FROM
tb_score;
TRUNCATE TABLE
tb_score;
INSERT
INTO
tb_score
SELECT
*
FROM
#tmp;
Oracle
查询多余的重复记录:
SELECT
*
FROM
tb_score a
WHERE
a.ROWID >
(
SELECT
MIN(ROWID)
FROM
tb_score b
WHERE
a.id = b.id
AND a.name = b.name
AND a.course = b.course)
删除多余的重复记录:
DELETE
FROM
tb_score a
WHERE
a.ROWID >
(
SELECT
MIN(ROWID)
FROM
tb_score b
WHERE
a.id = b.id
AND a.name = b.name
AND a.course = b.course)
DB2
查询多余的重复记录:
SELECT
id,
name,
course
FROM
(
SELECT
id,
name,
course,
ROW_NUMBER() OVER(PARTITION BY id, name, course) AS row_seq
FROM
tb_score) t
WHERE
t.row_seq > 1
删除多余的重复记录:
DELETE
FROM
(
SELECT
id,
name,
course,
ROW_NUMBER() OVER(PARTITION BY id, name, course) AS row_seq
FROM
tb_score) t
WHERE
t.row_seq > 1
分享到:
2012-05-04 10:01
浏览 1063
分类:数据库
评论