先创建表格:
mysql> Create table If Not Exists Person (Id int, Email varchar(255));
Query OK, 0 rows affected (0.22 sec)
mysql> insert into Person (id, email) values ('1', 'john@example.com');
Query OK, 1 row affected (0.03 sec)
mysql> insert into Person (id, email) values ('2', 'bob@example.com');
Query OK, 1 row affected (0.01 sec)
mysql> insert into Person (id, email) values ('3', 'john@example.com');
Query OK, 1 row affected (0.01 sec)
mysql> select * from Person;
+------+------------------+
| Id | Email |
+------+------------------+
| 1 | john@example.com |
| 2 | bob@example.com |
| 3 | john@example.com |
+------+------------------+
3 rows in set (0.01 sec)
编写一个 SQL 删除语句来 删除 所有重复的电子邮件,只保留一个id最小的唯一电子邮件。以任意顺序返回结果表。
法一:
第一步:首先找出email相同但id更大的 id和email
mysql> select p1.id,p1.email from person as p1,person as p2 where p1.email = p2.email and p1.id > p2.id;
+------+------------------+
| id | email |
+------+------------------+
| 3 | john@example.com |
+------+------------------+
1 row in set (0.01 sec)
第二步:从p1中删除第一步找到的数据。
mysql> delete p1 from person p1,person p2 where p1.email = p2.email and p1.id > p2.id;
Query OK, 1 row affected (0.01 sec)
mysql> select * from Person;
+------+------------------+
| Id | Email |
+------+------------------+
| 1 | john@example.com |
| 2 | bob@example.com |
+------+------------------+
2 rows in set (0.00 sec)
法二:
第一步:按着email分组,找出id最小的id和email
mysql> SELECT * FROM
-> (
-> SELECT MIN(ID)
-> FROM PERSON
-> GROUP BY EMAIL
-> ) AS P1
-> ;
+---------+
| MIN(ID) |
+---------+
| 1 |
| 2 |
+---------+
2 rows in set (0.00 sec)
第二步:将第一步得到的数据建立临时表p1,删除person表中id不在p1中的数据。
mysql> DELETE FROM PERSON
-> WHERE ID
-> NOT IN
-> (SELECT * FROM
-> (
-> SELECT MIN(ID)
-> FROM PERSON
-> GROUP BY EMAIL
-> ) AS P1);
Query OK, 2 rows affected (0.02 sec)
mysql> select * from Person;
+------+------------------+
| Id | Email |
+------+------------------+
| 1 | john@example.com |
| 2 | bob@example.com |
+------+------------------+
2 rows in set (0.00 sec)
法三:
第一步:
#按照email分组,按照id排序
mysql> select Id,row_number() over(partition by Email order by Id) rn
-> from Person;
+------+----+
| Id | rn |
+------+----+
| 2 | 1 |
| 1 | 1 |
| 3 | 2 |
+------+----+
3 rows in set (0.00 sec)
#按照email分组,按照id排序后,找出排序名次大于1的id
mysql> select Id
->from
-> (
-> select Id,row_number() over(partition by Email order by Id) rn
-> from Person
-> ) t1
-> where rn>1;
+------+
| Id |
+------+
| 3 |
+------+
1 row in set (0.00 sec)
第二步:删除上一步找出的数据
mysql> delete
-> from Person
-> where Id in
-> (
-> select Id
-> from
-> (
-> select Id,
-> row_number() over(partition by Email order by Id) rn
-> from Person
-> ) t1
-> where rn>1
-> );
Query OK, 1 row affected (0.01 sec)
mysql> select * from Person;
+------+------------------+
| Id | Email |
+------+------------------+
| 1 | john@example.com |
| 2 | bob@example.com |
+------+------------------+
2 rows in set (0.00 sec)
法四:
第一步:按照email分组并且找出Email的数量大于1的最小的id数和对应的email
mysql> select min(id) as id, email from person group by email having count(email) > 1;
+------+------------------+
| id | email |
+------+------------------+
| 1 | john@example.com |
+------+------------------+
1 row in set (0.00 sec)
第二步:将上表作为临时表p2,找出person表中的email等于p2中的email但是id号不一样的数据,把这个数据删除
mysql> delete person from person,
-> (select min(id) as id, email from person group by email having count(email) > 1) as p2
-> where person.email = p2.email and person.id != p2.id;
Query OK, 1 row affected (0.02 sec)
mysql> select * from Person;
+------+------------------+
| Id | Email |
+------+------------------+
| 1 | john@example.com |
| 2 | bob@example.com |
+------+------------------+
2 rows in set (0.00 sec)