无论创建何种类型的分区,如果表中存在主键或唯一索引的列,则分区列必须是主键或唯一索引的一部分。索引列可以是null值。
在没有主键和唯一索引的表中可以指定任意列为索引列。表中只能最多有一个唯一索引,即primary key 和unique key不能同时存在,primary key包含在unique key中时除外。
如对id分区,1千万一个区,分了100个区,当查id=1时,没分区时原来的索引就会对全表走索引,分区后,会直接查第1个区。
1.新建normal_table - 正常表,1千万数据
新建area_table - 分区表,1千万数据
CREATE TABLE `normal_table` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`key_num` int(11) NOT NULL DEFAULT '0' COMMENT '索引数字',
`num` int(11) NOT NULL DEFAULT '0' COMMENT '普通数字',
PRIMARY KEY (`ID`),
KEY `key_num` (`key_num`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='正常表,1千万数据';
CREATE TABLE `area_table` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`key_num` int(11) NOT NULL DEFAULT '0' COMMENT '索引数字',
`num` int(11) NOT NULL DEFAULT '0' COMMENT '普通数字',
PRIMARY KEY (`ID`),
KEY `key_num` (`key_num`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='分区表,1千万数据'
PARTITION BY RANGE COLUMNS(id) (
PARTITION p0 VALUES LESS THAN(1000001),
PARTITION p1 VALUES LESS THAN(2000001),
PARTITION p2 VALUES LESS THAN(3000001),
PARTITION p3 VALUES LESS THAN(4000001),
PARTITION p4 VALUES LESS THAN(5000001),
PARTITION p5 VALUES LESS THAN(6000001),
PARTITION p6 VALUES LESS THAN(7000001),
PARTITION p7 VALUES LESS THAN(8000001),
PARTITION p8 VALUES LESS THAN(9000001),
PARTITION p9 VALUES LESS THAN(MAXVALUE)
);
SELECT * from area_table WHERE num BETWEEN 0 and 990000;
8.375s,无索引
SELECT * from normal_table WHERE num BETWEEN 0 and 990000;
8.590s,无索引
SELECT * from area_table WHERE key_num BETWEEN 0 and 990000;
4.029s,能使用索引
SELECT * from normal_table WHERE key_num BETWEEN 0 and 990000;
9.345s,不能使用索引
SELECT * from area_table WHERE key_num BETWEEN 0 and 2500000;
12.000s,能使用索引
SELECT * from normal_table WHERE key_num BETWEEN 0 and 2500000;
16.365s,不能使用索引
SELECT * from area_table WHERE id BETWEEN 0 and 2500000;
18.544s,能使用主键,只扫描p0,p1,p2分片
SELECT * from normal_table WHERE id BETWEEN 0 and 2500000;
19.478s,能使用主键
SELECT * from area_table WHERE id BETWEEN 0 and 990000;
3.964s,能使用主键,只扫描p0分片
SELECT * from normal_table WHERE id BETWEEN 0 and 990000;
3.807s,能使用主键
2.新建big_normal_table - 正常表,8千万数据
新建big_area_table - 分区表,8千万数据
CREATE TABLE `big_normal_table` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`key_num` int(11) NOT NULL DEFAULT '0' COMMENT '索引数字',
`num` int(11) NOT NULL DEFAULT '0' COMMENT '普通数字',
PRIMARY KEY (`ID`),
KEY `key_num` (`key_num`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='大正常表,八千万数据';
CREATE TABLE `big_area_table` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`key_num` int(11) NOT NULL DEFAULT '0' COMMENT '索引数字',
`num` int(11) NOT NULL DEFAULT '0' COMMENT '普通数字',
PRIMARY KEY (`ID`),
KEY `key_num` (`key_num`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='大分区表,8千万数据'
PARTITION BY RANGE COLUMNS(id) (
PARTITION p0 VALUES LESS THAN(10000001),
PARTITION p1 VALUES LESS THAN(20000001),
PARTITION p2 VALUES LESS THAN(30000001),
PARTITION p3 VALUES LESS THAN(40000001),
PARTITION p4 VALUES LESS THAN(50000001),
PARTITION p5 VALUES LESS THAN(60000001),
PARTITION p6 VALUES LESS THAN(70000001),
PARTITION p7 VALUES LESS THAN(80000001),
PARTITION p8 VALUES LESS THAN(90000001),
PARTITION p9 VALUES LESS THAN(MAXVALUE)
);
SELECT * from big_area_table WHERE num BETWEEN 0 and 990000;
43.883s,无索引
SELECT * from normal_table WHERE num BETWEEN 0 and 990000;
43.557s,无索引
SELECT * from big_area_table WHERE key_num BETWEEN 0 and 990000;
3.889s,能使用索引
SELECT * from normal_table WHERE key_num BETWEEN 0 and 990000;
3.940s,能使用索引
SELECT * from big_area_table WHERE key_num BETWEEN 0 and 2000000;
15.603s,能使用索引
SELECT * from normal_table WHERE key_num BETWEEN 0 and 2000000;
14.556s,能使用索引
SELECT * from big_area_table WHERE key_num BETWEEN 0 and 8000000;
86.228s,能使用索引
SELECT * from normal_table WHERE key_num BETWEEN 0 and 8000000;
77.976s,能使用索引
EXPLAIN SELECT * from big_area_table WHERE key_num BETWEEN 8000000 and 15000000;
56.523s,能使用索引
EXPLAIN SELECT * from normal_table WHERE key_num BETWEEN 8000000 and 15000000;
88.527s,不能使用索引
SELECT * from big_area_table WHERE id BETWEEN 0 and 8000000;
41.931s,只扫描p0分片,能使用主键
SELECT * from normal_table WHERE id BETWEEN 0 and 8000000;
41.806s,能使用主键
以上实验可得:
用一般字段查询时,分区表和正常表差别不大,因为都用不到索引和主键,都是全表查询,所以速度差别不大。
用索引查询时,对某些查询(常见于数据量较大的查询),分区表速度会快一些,分区表能用到索引,正常表用不到索引,
猜测原因是正常表判断这个索引查询范围数据量太大,走索引效率也不高,所以直接全表扫描了;
而分区表通过索引定位到主键时,发现这些主键只在某几个分区,这样只查这几个分区就效率很快,比全表扫描要好,就可以走索引了。
用主键查询时,分区表和正常表差别不大,因为都是走主键,所以速度差别不大。
总结:
当你的表是千万级的,且需要走某个索引去查数据,但因为查的数据量过大走不了索引时,可以考虑用分区表。
比如一个订单表,有个功能要查1年来的订单明细,就算对订单时间加了索引,但数据量太大还是走不了索引,这时就可考虑用分区表了。
ps:
-- 创建存储过程插入数据
DROP PROCEDURE IF EXISTS add_data;
DELIMITER $$
CREATE PROCEDURE add_data()
BEGIN
DECLARE i INT;
SET i=1;
WHILE(i<=10000000) DO
insert into normal_table(key_num,num) values(i, i);
insert into area_table(key_num,num) values(i, i);
SET i=i+1;
END WHILE;
END;
$$
DELIMITER;
-- 调用存储过程
call add_data();
--查询每个分区的数据量
SELECT
partition_name,
partition_expression,
table_rows
FROM
information_schema.PARTITIONS
WHERE
table_schema = SCHEMA()
AND table_name = 'area_table';