使用select count查看一下数据库数据量
mysql> select count(*) from zyads_integral ;
+----------+
| count(*) |
+----------+
| 4130473 |
+----------+
1 row in set (0.01 sec)
`desc查看一下数据表结构
mysql> desc zyads_integral;
+-------+---------+------+-----+---------+----------------+
| Field | Type | Null | Key | Default | Extra |
+-------+---------+------+-----+---------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| hash | text | YES | | NULL | |
| sha1 | text | NO | | NULL | |
| name | text | NO | | NULL | |
| index | text | YES | | NULL | |
| size | text | YES | | NULL | |
+-------+---------+------+-----+---------+----------------+
6 rows in set (0.01 sec)
样例数据
mysql> select * from zyads_integral limit 1\G
*************************** 1. row ***************************
id: 6721212
hash: 0FA565EEFA9E688B1F87640815EE090C7326725D
sha1: 8c907b045bb7905cf2a63f0b1208eeb3bca857d6
name: 【无效链接】xxxxxx.html
index: 107
size: 78110108
1 row in set (0.01 sec)
接下来开始去掉重复数据
mysql> select id, sha1, count(*) from zyads_integral group by sha1 limit 10;
+---------+------------------------------------------+----------+
| id | sha1 | count(*) |
+---------+------------------------------------------+----------+
| 7696 | | 1 |
| 5137851 | 0000000000000000000000000000000005325911 | 2 |
| 5363699 | 00000000000000000000000000000000097ecf88 | 5 |
| 4826139 | 000000000000000000000000000000000fd81983 | 1 |
| 6250586 | 000000000000000000000000000000001b41f909 | 1 |
| 5597063 | 000000000000000000000000000000001d385b7c | 2 |
| 5281295 | 000000000000000000000000000000002a91e078 | 2 |
| 6331972 | 000000000000000000000000000000003488380d | 2 |
| 4774906 | 00000000000000000000000000000000397db43d | 1 |
| 4550736 | 00000000000000000000000000000000494ec71f | 1 |
+---------+------------------------------------------+----------+
10 rows in set (24.71 sec)
mysql> select count(*) from zyads_integral where sha1= '0000000000000000000000000000000005325911';
+----------+
| count(*) |
+----------+
| 2 |
+----------+
1 row in set (1.03 sec)
mysql> select id, count(*) from zyads_integral group by sha1 having count(*) > 1;
+---------+----------+
| id | count(*) |
+---------+----------+
| 5137851 | 2 |
| 5363699 | 5 |
| 5597063 | 2 |
| 5281295 | 2 |
...
| 4712249 | 6 |
| 1581236 | 3 |
| 5126827 | 2 |
| 1872277 | 7 |
+---------+----------+
836343 rows in set (33.77 sec)
mysql> select id from zyads_integral group by sha1 having count(*) >= 1;
+---------+
| id |
+---------+
| 7696 |
| 5137851 |
| 5363699 |
| 4826139 |
| 6250586 |
...
| 5126827 |
| 570573 |
| 1872277 |
| 4514446 |
+---------+
2466076 rows in set (3 min 36.80 sec)
删除数据
mysql> delete from zyads_integral where id in (select a.id from (select id from zyads_integral group by sha1 having count(*) > 1) a);
CREATE TABLE `zyads_integral_tmp` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`hash` varchar(100),
`sha1` varchar(100) NOT NULL,
`name` varchar(1000) NOT NULL,
`index` varchar(10),
`size` varchar(10),
UNIQUE KEY `sha1` (`sha1`),
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=6756155 DEFAULT CHARSET=gbk
INSERT INTO zyads_integral_tmp (`hash`,`sha1`,`name`,`index`,`size`) SELECT `hash`,`sha1`,`name`,`index`,`size` from zyads_integral group by sha1 having count(*)>=1;
mysql> rename zyads_integral zyads_integral_tmp_1 ;