索引分析
单表
建表SQL
CREATE TABLE IF NOT EXISTS article (
`id` INT(10) UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,
`author_id` INT(10) UNSIGNED NOT NULL,
`category_id` INT(10) UNSIGNED NOT NULL,
`views` INT(10) UNSIGNED NOT NULL,
`comments` INT(10) UNSIGNED NOT NULL,
`title` VARBINARY(255) NOT NULL,
`content` TEXT NOT NULL
);
INSERT INTO article ( author_id , category_id , views , `comments` , title , content) VALUES
(1, 1,1, 1,'1','1'),
(2, 2,2,2, '2', '2'),
(1, 1,3,3, '3', '3');
SELECT * FROM article;
案例
查询category_id为1且comments大于1的情况下,views最多的article_id。
EXPLAIN SELECT id,author_id FROM article WHERE category_id = 1 AND comments > 1 ORDER BY views DESC LIMIT 1;
结论:很显然,type是ALL,即最坏的情况。Extra里还出现了Using filesort,也是最坏的情况。优化是必须的。
开始优化
#新建索引1
create index idx_article_ccv on article(category_id,`comments`,views);
#查看索引
SHOW INDEX FROM article;
#运行查询语句
EXPLAIN SELECT id,author_id FROM article WHERE category_id = 1 AND `comments`>1 ORDER BY views DESC LIMIT 1;
#运行查询语句
EXPLAIN SELECT id,author_id FROM article WHERE category_id = 1 AND `comments`=3 ORDER BY views DESC LIMIT 1;
结论:type变成了range,这是可以忍受的。但是extra里使用Using filesort仍是无法接受的。但是我们已经建立了索引,为啥没用呢? 这是因为按照BTree索引的工作原理, 先排序category_id,如果遇到相同的category_id则再排序comments,如果遇到相同的comments则再排序views。当comments字段在联合索引里处于中间位置时, 因comments>1条件是一个范围值(所谓range),MySQL无法利用索引再对后面的views部分进行检索即range类型查询字段后面的索引无效。
#修改索引(不用)
ALTER TABLE article ADD INDEX idx_article_ccv(category_id ,`comments`, views);
#删除索引
DROP INDEX idx_article_ccv ON article;
#新建索引2
create index idx_article_cv on article(category_id,views);
#运行查询语句
EXPLAIN SELECT id,author_id FROM article WHERE category_id = 1 AND `comments` > 1 ORDER BY views DESC LIMIT 1;
#删除索引
DROP INDEX idx_article_cv ON article;
结论:可以看到,type变为了ref,Extra中的Using filesort也消失了,结果非常理想。
两表
建表SQL
#创建class表
CREATE TABLE IF NOT EXISTS `class` (
id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
card INT(10) UNSIGNED NOT NULL,
PRIMARY KEY (id)
);
#创建book表
CREATE TABLE IF NOT EXISTS `book` (
bookid INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
card INT(10) UNSIGNED NOT NULL,
PRIMARY KEY (bookid)
);
#插入数据(class)
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
SELECT * FROM calss;
#插入数据(book)
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
SELECT * FROM book;
案例
#运行查询语句
EXPLAIN SELECT * FROM class LEFT JOIN book ON class.card = book.card;
#创建索引(book)
ALTER TABLE `book` ADD INDEX Y(`card`);
#查看索引
SHOW INDEX FROM `book`;
#运行查询语句
EXPLAIN SELECT * FROM class LEFT JOIN book ON class.card = book.card;
#删除索引
DROP INDEX Y ON `book`;
#查看索引
SHOW INDEX FROM `book`;
#创建索引(class)
ALTER TABLE `class` ADD INDEX Y(`card`);
#查看索引
SHOW INDEX FROM `class`;
#运行查询语句
EXPLAIN SELECT * FROM class LEFT JOIN book ON class.card = book.card;
可以看到第二行的type变为了ref,rows也变成了优化比较明显。这是由左连接特性决定的。LEFT JOIN条件用于确定如何从右表搜索行,左边一定都有,所以右边是我们的关键点一定需要建立索引。
#删除索引
DROP INDEX Y ON `class`;
#查看索引
SHOW INDEX FROM `class`;
结论:左连,右边建索引;右连,左边创索引。
三表
建表SQL
CREATE TABLE IF NOT EXISTS `phone` (
`phoneid` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`card` INT(10) UNSIGNED NOT NULL,
PRIMARY KEY (`phoneid`)
) ENGINE = INNODB;
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO phone(card) VALUES(FLOOR(1 + (RAND() * 20)));
案例
#运行查询语句
EXPLAIN SELECT * FROM `class` LEFT JOIN `book` ON class.card = book.card LEFT JOIN `phone` ON book.card = phone.card;
#增加索引(book,phone)
ALTER TABLE `book` ADD INDEX Y(`card`);
ALTER TABLE `phone` ADD INDEX Z(`card`);
#查看索引
SHOW INDEX FROM `book`;
SHOW INDEX FROM `phone`;
#运行查询语句
EXPLAIN SELECT * FROM `class` LEFT JOIN `book` ON class.card = book.card LEFT JOIN `phone` ON book.card = phone.card;
后2行的type都是ref且总rows优化很好,效果不错。因此索引最好设置在需要经常查询的字段中。
细节
- 尽可能减少Join语句中的NestedLoop的循环总次数;“永远用小结果集驱动大的结果集”。
- 优先优化NestedLoop的内层循环;
- 保证Join语句中被驱动表上Join条件字段已经被索引;
- 当无法保证被驱动表的Join条件字段被索引且内存资源充足的前提下,不要太吝惜JoinBuffer的设置;
索引失效(如何避免)
建表SQL
CREATE TABLE staffs (
id INT PRIMARY KEY AUTO_INCREMENT,
NAME VARCHAR (24) NOT NULL DEFAULT '' COMMENT '姓名',
age INT NOT NULL DEFAULT 0 COMMENT '年龄',
pos VARCHAR (20) NOT NULL DEFAULT '' COMMENT '职位',
add_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '入职时间'
) CHARSET utf8 COMMENT '员工记录表';
INSERT INTO staffs(NAME,age,pos,add_time) VALUES('z3',22,'manager',NOW());
INSERT INTO staffs(NAME,age,pos,add_time) VALUES('July',23,'dev',NOW());
INSERT INTO staffs(NAME,age,pos,add_time) VALUES('2000',23,'dev',NOW());
SELECT * FROM staffs;
#添加索引(idx_staffs_nameAgePos)
ALTER TABLE staffs ADD INDEX idx_staffs_nameAgePos(`name`,`age`,`pos`);
#查看索引
SHOW INDEX FROM staffs;
案例(索引失效)
全值匹配我最爱
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` = 'July';
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` = 'July' AND age = 25;
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` = 'July' AND age = 23 AND pos = 'dev';
最佳左前缀法则(重点)
如果索引了多列,要遵守最左前缀法则。指的是查询从索引的最左前列开始并且不跳过索引中的列。
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE age = 23 AND pos = 'dev';
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE pos = 'dev';
并不是从索引的最左前列开始,导致索引失效;
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` = 'July' AND pos = 'dev';
跳过索引中的列,导致索引失效;
不在索引列上做任何操作(计算、函数、(自动or手动)类型转换),会导致索引失效而转向全表扫描
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE LEFT(`name`,4) = 'July';
LEFT(
name
,4)
存储引擎不能使用索引中范围条件右边的列
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` = 'July' AND age > 22 AND pos = 'dev';
"age > 22"后面失效,建议创建索引时age字段放最后。
尽量使用覆盖索引(只访问索引的查询(索引列和查询列一致)),减少select *
#运行查询语句
EXPLAIN SELECT `name`,`age`,`pos` FROM staffs WHERE `name` = 'July' AND age = 23 AND pos = 'dev';
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` = 'July' AND age = 23 AND pos = 'dev';
查询用字段比*用跟优。
#运行查询语句
EXPLAIN SELECT `add_time`,`name`,`age` FROM staffs WHERE `name` = 'July' AND age = 23 AND pos = 'dev';
查询字段尽量查对应索引字段。
mysql在使用不等于(!=或者<> )的时候无法使用索引会导致全表扫描
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` != 'July';
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` <> 'July';
is null,is not null也无法使用索引
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` IS NULL;
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` IS NOT NULL;
like以通配符开头(’%abc…’、’_abc’)mysql索引失效会变成全表扫描的操作
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` LIKE '%J';
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` LIKE '_J';
#运行查询语句
EXPLAIN SELECT * FROM staffs WHERE `name` LIKE 'J%';
问题:解决like '%字符串%"时索引不被使用的方法?
CREATE TABLE tbl_user (
`id` INT(11) NOT NULL AUTO_INCREMENT,
`name` VARCHAR(20) DEFAULT NULL,
`age` INT(11) DEFAULT NULL,
email VARCHAR(20) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE = INNODB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
INSERT INTO tbl_user(`name`,age,email) VALUES('1aa1',21,'b@163.com');
INSERT INTO tbl_user(`name`,age,email) VALUES('2aa2',222,'a@163.com');
INSERT INTO tbl_user(`name`,age,email) VALUES('3aa3',265,'c@163.com');
INSERT INTO tbl_user(`name`,age,email) VALUES('4aa4',21,'d@163.com');
#运行查询语句
EXPLAIN SELECT `name`,age FROM tbl_user WHERE `name` LIKE '%aa%';
#创建索引(idx_tblUser_nameAge )
CREATE INDEX idx_tblUser_nameAge ON tbl_user(`name`,age);
#查看索引
SHOW INDEX FROM tbl_user;
#运行查询语句
EXPLAIN SELECT `name`,age FROM tbl_user WHERE `name` LIKE '%aa%';
创建复合索引,查询字段不能超过复合索引包含的字段;
字符串不加单引号索引失效
少用or,用它来连接时会索引失效
EXPLAIN SELECT * FROM staffs WHERE `name` = 'July' or `name` = 'z3';
使用UNION
EXPLAIN SELECT * FROM staffs WHERE `name` = 'July' UNION SELECT * FROM staffs WHERE `name` = 'z3';
总结
优化口诀
全值匹配我最爱,最左前缀要遵守;
带头大哥不能死,中间兄弟不能断;
索引列上少计算,范围之后全失效;
LIKE百分写最右,覆盖索引不写星;
试题
建表SQL
create table test03(
id int primary key not null auto_increment,
c1 char(10),
c2 char(10),
c3 char(10),
c4 char(10),
c5 char(10)
);
insert into test03(c1,c2,c3,c4,c5) values('a1','a2','a3','a4','a5');
insert into test03(c1,c2,c3,c4,c5) values('b1','b2','b3','b4','b5');
insert into test03(c1,c2,c3,c4,c5) values('c1','c2','c3','c4','c5');
insert into test03(c1,c2,c3,c4,c5) values('d1','d2','d3','d4','d5');
insert into test03(c1,c2,c3,c4,c5) values('e1','e2','e3','e4','e5');
SELECT * FROM test03
CREATE INDEX idx_text03_c12345 ON test03(c1,c2,c3,c4,c5);
SHOW INDEX FROM test03
题目
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' AND c3 = 'a3' AND c4 = 'a4';
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' AND c4 = 'a4' AND c3 = 'a3';
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' AND c3 > 'a3' AND c4 = 'a4';
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' AND c4 > 'a4' AND c3 = 'a3';
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' AND c4 > 'a4' ORDER BY c3;
c3作用在排序而不是查找
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' ORDER BY c3;
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' ORDER BY c4;
出现了filesort
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c5 = 'a5' ORDER BY c2,c3;
只用c1一个字段索引,但是c2,c3用于排序无filesort;
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c5 = 'a5' ORDER BY c3,c2;
出现了filesor, 我们建的索引是1234,它没有按照顺序来,c3/c2颠倒了
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' AND c5 = 'a5' ORDER BY c2,c3;
#对比EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c5 = 'a5' ORDER BY c3,c2;
EXPLAIN SELECT * FROM test03 WHERE c1 = 'a1' AND c2 = 'a2' AND c5 = 'a5' ORDER BY c3,c2;
与上面做对比,重点c2 = ‘a2’,ORDER BY排序c2已经是一个常量。
EXPLAIN SELECT c3,c2 FROM test03 WHERE c1 = 'a1' AND c4 = 'a4' GROUP BY c3,c2;
分组之前必排序;
定值,范围还是排序,一般order by是给个范围;
group by基本上都需要进行排序,会有临时表产生;
一般性建议
对于单键索引,尽量选择针对当前query过滤性更好的索引;
在选择组合索引的时候,当前Query中过滤性最好的字段在索引字段顺序中,位置越靠前(左)越好;
在选择组合索引的时候,尽量选择可以能够包含当前query中的where字句中更多字段的索引;
尽可能通过分析统计信息和调整query的写法来达到选择合适索引的目的;