mysql 处理重复数据_MySQL处理重复数据实例分析

MySQL限制数据重复的方式:表上增加主键(Primary Key)或增加唯一性索引(Unique)

主键对重复资料进行限制,这样资料在导入时就无法重复插入

create table primary_t(

last_name varchar(20),

first_name varchar(20),

sex varchar(8),

primary key(last_name,first_name)

)engine=innodb default charset=utf8;

show table status like 'primary_t'\G

show index from primary_t\G

模拟插入两笔有主键约束的资料

insert into primary_t(last_name,first_name,sex) values('Frank','Li','Man'),('Frank','Li','Man'),('Coco','Li','Woman');

ERROR 1062 (23000): Duplicate entry 'Frank-Li' for key 'PRIMARY'

insert into primary_t(last_name,first_name,sex) values('Frank','Li','Man'),('Frank','Cai','Man'),('Coco','Li','Woman');

mysql> select * from primary_t;

+-----------+------------+-------+

| last_name | first_name | sex |

+-----------+------------+-------+

| Coco | Li | Woman |

| Frank | Cai | Man |

| Frank | Li | Man |

+-----------+------------+-------+

rows in set (0.00 sec)

唯一性索引对复制资料进行限制

create table unique_t(

last_name varchar(20),

first_name varchar(20),

sex varchar(8),

unique index unique_name(last_name,first_name)

)engine=innodb default charset=utf8;

show table status like 'unique_t'\G

show index from unique_t\G

模拟插入两笔有唯一性约束的资料

insert into unique_t(last_name,first_name,sex) values('Frank','Li','Man'),('Frank','Li','Man'),('Coco','Li','Woman');

ERROR 1062 (23000): Duplicate entry 'Frank-Li' for key 'unique_name'

insert into unique_t(last_name,first_name,sex) values('Frank','Li','Man'),('Frank','Cai','Man'),('Coco','Li','Woman');

mysql> select * from unique_t;

+-----------+------------+-------+

| last_name | first_name | sex |

+-----------+------------+-------+

| Frank | Li | Man |

| Frank | Cai | Man |

| Coco | Li | Woman |

+-----------+------------+-------+

rows in set (0.00 sec)

对资料的统计

create table count_t(

last_name varchar(20),

first_name varchar(20),

sex varchar(8)

)engine=innodb default charset=utf8;

insert into count_t values('Frank','A','Man'),

('Frank','B','Man'),

('Frank','C','Woman'),

('Frank','D','Man'),

('Frank','E','Man'),

('Frank','F','Woman'),

('Frank','G','Man'),

('Frank','H','Man'),

('Frank','I','Woman'),

('Coco','A','Woman'),

('Coco','B','Man'),

('Coco','C','Man'),

('Coco','D','Man'),

('Coco','E','Man'),

('Coco','F','Man'),

('Coco','G','Woman'),

('Coco','H','Woman'),

('Alex','B','Man'),

('ALex','C','Man'),

('ALex','D','Man'),

('Alex','E','Man'),

('ALex','F','Man'),

('ALex','G','Woman'),

('ALex','H','Woman')

;

commit;

按各个列进行分组统计

select last_name,count(*) from count_t group by last_name;

select first_name,count(*) from count_t group by first_name;

select sex,count(*) from count_t group by sex;

mysql> select last_name,count(*) from count_t group by last_name;

+-----------+----------+

| last_name | count(*) |

+-----------+----------+

| Alex | 7 |

| Coco | 8 |

| Frank | 9 |

+-----------+----------+

rows in set (0.00 sec)

mysql> select first_name,count(*) from count_t group by first_name;

+------------+----------+

| first_name | count(*) |

+------------+----------+

| A | 2 |

| B | 3 |

| C | 3 |

| D | 3 |

| E | 3 |

| F | 3 |

| G | 3 |

| H | 3 |

| I | 1 |

+------------+----------+

rows in set (0.00 sec)

mysql> select sex,count(*) from count_t group by sex;

+-------+----------+

| sex | count(*) |

+-------+----------+

| Man | 16 |

| Woman | 8 |

+-------+----------+

rows in set (0.00 sec)

对分组的统计在进行限定,having 大于等于8次的记录

select last_name ,count(*) from count_t group by last_name having count(*) >=8;

mysql> select last_name ,count(*) from count_t group by last_name having count(*) >=8;

+-----------+----------+

| last_name | count(*) |

+-----------+----------+

| Coco | 8 |

| Frank | 9 |

+-----------+----------+

rows in set (0.00 sec)

对资料进行去重操作,主要使用distinct函数

select distinct last_name from count_t;

select distinct first_name from count_t;

select distinct sex from count_t;

对已有资料的表进行去重操作

insert into count_t values('Xi','Xi','Man'),('Xi','Xi','Man');

增加主键约束,加入ignore,忽悠重复的资料

alter ignore table count_t

add primary key(last_name,first_name);

select * from count_t where last_name='Xi';

mysql> alter ignore table count_t

-> add primary key(last_name,first_name);

Query OK, 26 rows affected (0.01 sec)

Records: 26 Duplicates: 1 Warnings: 0

mysql> select * from count_t where last_name='Xi';

+-----------+------------+------+

| last_name | first_name | sex |

+-----------+------------+------+

| Xi | Xi | Man |

+-----------+------------+------+

row in set (0.00 sec)

使用group by方式也可去除重复资料

create table count_t0 as select last_name,first_name,sex from count_t group by last_name,first_name,sex;

0b1331709591d260c1c78e86d0c51c18.png

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值