六、MySQL 索引优化二

骑士梦

已于 2023-06-14 14:13:59 修改

阅读量624

点赞数

分类专栏： MySQL 文章标签： mysql 数据库 database

于 2021-12-08 20:38:58 首次发布

骑士梦

本文链接：https://blog.csdn.net/qq_38667427/article/details/121799844

版权

MySQL 专栏收录该内容

13 篇文章 2 订阅

订阅专栏

一、MySQL 操作集
二、MySQL 数据类型
三、MySQL 函数
四、MySQL Explain
五、MySQL 索引优化一
六、MySQL 索引优化二
七、SQL 优化
八、MySQL 锁与事务
九、MySQL 索引数据结构
十、MySQL 外键

一、索引优化

员工记录表。

DROP TABLE IF EXISTS `employees`;

CREATE TABLE `employees`
(
    `id`        int(11)     NOT NULL AUTO_INCREMENT,
    `name`      varchar(24) NOT NULL DEFAULT '' COMMENT '姓名',
    `age`       int(11)     NOT NULL DEFAULT '0' COMMENT '年龄',
    `position`  varchar(20) NOT NULL DEFAULT '' COMMENT '职位',
    `hire_time` timestamp   NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '入职时间',
    PRIMARY KEY (`id`),
    -- 联合索引
    KEY `idx_name_age_position` (`name`, `age`, `position`) USING BTREE
) ENGINE = InnoDB
  AUTO_INCREMENT = 4
  DEFAULT CHARSET = utf8 COMMENT ='员工记录表';

# 插入三条数据
INSERT INTO employees(name, age, position, hire_time)
VALUES ('LiLei', 22, 'manager', NOW());
INSERT INTO employees(name, age, position, hire_time)
VALUES ('HanMeimei', 23, 'dev', NOW());
INSERT INTO employees(name, age, position, hire_time)
VALUES ('Lucy', 23, 'dev', NOW());

1. `MySQL` 如何选择合适索引

EXPLAIN select * from employees where name > 'a';

上面 SQL 如果使用 name 索引查询。

需要遍历 name 字段联合索引树。
然后还需要根据遍历出来的主键值，到主键索引树再去查出最终数据。
成本比全表扫描还高。所以 不选择使用索引。

EXPLAIN select name,age,position from employees where name > 'a' ;

可以使用 覆盖索引优化。
只需要遍历 name 字段的联合索引树，就能拿到所有结果（不需要再回表查询）。

EXPLAIN select * from employees where name > 'zzz';

匹配到索引 name。

对于 name > 'a' 和 name > 'zzz' 的执行结果。
MySQL 最终是否选择走索引或者一张表涉及多个索引，MySQL 最终如何选择索引。
可以用 trace 工具查看。
开启 trace 工具会影响 MySQL 性能。
所以只能临时分析SQL 使用，用完之后立即关闭。

2. `trace` 工具

EXPLAIN select * from employees where name > 'a' order by position;

全表扫描。

EXPLAIN select * from employees where name > 'zzz' order by position;

索引查询。

# 开启trace
set session optimizer_trace="enabled=on",end_markers_in_json=on;

# 关闭trace
set session optimizer_trace="enabled=off";

2.1 全表扫描

分析 trace 字段。

select * from employees where name > 'a' order by position;
SELECT * FROM information_schema.OPTIMIZER_TRACE;

{
	"steps": [
		{	
			// 一、SQL准备阶段
			"join_preparation": {
				"select#": 1,
				"steps": [
					{
						"expanded_query": "/* select#1 */ select `employees`.`id` AS `id`,`employees`.`name` AS `name`,`employees`.`age` AS `age`,`employees`.`position` AS `position`,`employees`.`hire_time` AS `hire_time` from `employees` where (`employees`.`name` > 'a') order by `employees`.`position`"
					}
				] /* steps */
			} /* join_preparation */
		},
		{	
			// 二、SQL优化阶段
			"join_optimization": {
				"select#": 1,
				"steps": [
					{
						// 1. 条件处理
						"condition_processing": {
							"condition": "WHERE",
							// 1.1 原始条件
							"original_condition": "(`employees`.`name` > 'a')",
							// 1.2 结果条件（比较了3条数据）
							"steps": [
								{
									"transformation": "equality_propagation",
									"resulting_condition": "(`employees`.`name` > 'a')"
								},
								{
									"transformation": "constant_propagation",
									"resulting_condition": "(`employees`.`name` > 'a')"
								},
								{
									"transformation": "trivial_condition_removal",
									"resulting_condition": "(`employees`.`name` > 'a')"
								}
							] /* steps */
						} /* condition_processing */
					},
					{
						// 替换生成的列
						"substitute_generated_columns": {} /* substitute_generated_columns */
					},
					{
						// 2. 表依赖详情
						"table_dependencies": [
							{
								"table": "`employees`",
								"row_may_be_null": false,	// 行可以为空
								"map_bit": 0,				// 映射位
								"depends_on_map_bits": [] /* depends_on_map_bits */
							}
						] /* table_dependencies */
					},
					{
						// 参考优化器key使用
						"ref_optimizer_key_uses": [] /* ref_optimizer_key_uses */
					},
					{
						// 3. 预估表的访问成本
						"rows_estimation": [
							{
								"table": "`employees`",
								// 范围分析
								"range_analysis": {
									// 3.1 全表扫描情况
									"table_scan": {
										"rows": 3,		// 扫描行数
										"cost": 3.7		// 查询成本
									} /* table_scan */ ,
									// 3.2 查询可能使用的索引
									"potential_range_indexes": [
										{
											"index": "PRIMARY",	// 主键索引
											"usable": false,
											"cause": "not_applicable"
										},
										{
											"index": "idx_name_age_position",	// 辅助索引
											"usable": true,
											"key_parts": [
												"name",
												"age",
												"position",
												"id"
											] /* key_parts */
										}
									] /* potential_range_indexes */ ,
									// 设置范围条件
									"setup_range_conditions": [] /* setup_range_conditions */ ,
									// 组索引范围
									"group_index_range": {
										"chosen": false,
										"cause": "not_group_by_or_distinct"
									} /* group_index_range */ ,
									// 3.3 分析各个索引使用成本
									"analyzing_range_alternatives": {
										// 扫描范围
										"range_scan_alternatives": [
											{
												"index": "idx_name_age_position",
												"ranges": [
													"a < name"			// 索引使用范围
												] /* ranges */ ,
												"index_dives_for_eq_ranges": true,
												"rowid_ordered": false,	// 使用该索引获取的记录是否按照主键排序
												"using_mrr": false,
												"index_only": false,	// 是否使用覆盖索引
												"rows": 3,				// 索引扫描行数
												"cost": 4.61,			// 索引使用成本
												"chosen": false,		// 是否选择该索引
												"cause": "cost"
											}
										] /* range_scan_alternatives */ ,
										// 分析行序交集
										"analyzing_roworder_intersect": {
											"usable": false,					// 不实用
											"cause": "too_few_roworder_scans"	// 扫描次数太少
										} /* analyzing_roworder_intersect */
									} /* analyzing_range_alternatives */
								} /* range_analysis */
							}
						] /* rows_estimation */
					},
					{
						// 4. 考虑执行计划
						"considered_execution_plans": [
							{
								"plan_prefix": [] /* plan_prefix */ ,
								"table": "`employees`",
								// 4.1 最优访问路径
								"best_access_path": {
									// 最终选择的访问路径
									"considered_access_paths": [
										{
											"rows_to_scan": 3,		// 扫描行
											"access_type": "scan",	// 访问类型（`scan`为全表扫描）
											"resulting_rows": 3,	// 结果行
											"cost": 1.6,			// 索引使用成本
											"chosen": true,			// 确定选择
											"use_tmp_table": true	// 使用临时表
										}
									] /* considered_access_paths */
								} /* best_access_path */ ,
								"condition_filtering_pct": 100,
								"rows_for_plan": 3,					// 计划行数
								"cost_for_plan": 1.6,				// 计划成本
								"sort_cost": 3,						// 排序成本
								"new_cost_for_plan": 4.6,			// 计划新成本 = 计划成本 + 排序成本
								"chosen": true						// 确定选择		
							}
						] /* considered_execution_plans */
					},
					{
						// 5. 将条件附加到表
						"attaching_conditions_to_tables": {
							"original_condition": "(`employees`.`name` > 'a')",		// 原条件
							"attached_conditions_computation": [] /* attached_conditions_computation */ ,
							// 附加条件总结
							"attached_conditions_summary": [
								{
									"table": "`employees`",
									"attached": "(`employees`.`name` > 'a')"
								}
							] /* attached_conditions_summary */
						} /* attaching_conditions_to_tables */
					},
					{
						// 6. 子句处理
						"clause_processing": {
							"clause": "ORDER BY",							// 排序
							"original_clause": "`employees`.`position`",	// 原排序子句
							"items": [
								{
									"item": "`employees`.`position`"
								}
							] /* items */ ,
							"resulting_clause_is_simple": true,				// 是否简单
							"resulting_clause": "`employees`.`position`"	// 结果排序子句
						} /* clause_processing */
					},
					{
						// 7. 重新考虑索引排序的访问路径
						"reconsidering_access_paths_for_index_ordering": {
							"clause": "ORDER BY",
							"steps": [] /* steps */ ,
							// 索引排序总结
							"index_order_summary": {
								"table": "`employees`",
								"index_provides_order": false,	// 索引提供排序
								"order_direction": "undefined",	// 排序方向
								"index": "unknown",				// 排序索引
								"plan_changed": false			// 计划改变
							} /* index_order_summary */
						} /* reconsidering_access_paths_for_index_ordering */
					},
					{
						// 8. 完善计划
						"refine_plan": [
							{
								"table": "`employees`"
							}
						] /* refine_plan */
					}
				] /* steps */
			} /* join_optimization */
		},
		{
			// 三、SQL执行阶段
			"join_execution": {
				"select#": 1,
				"steps": [
					{
						// 1. 文件排序信息
						"filesort_information": [
							{
								"direction": "asc",		// 方向
								"table": "`employees`",	// 表
								"field": "position"		// 字段
							}
						] /* filesort_information */ ,
						// 2. 文件排序优先级队列优化
						"filesort_priority_queue_optimization": {
							"usable": false,						// 不可用
							"cause": "not applicable (no LIMIT)"	// 不适用
						} /* filesort_priority_queue_optimization */ ,
						// 3. 文件排序执行
						"filesort_execution": [] /* filesort_execution */ ,
						// 4. 文件排序总结
						"filesort_summary": {
							"rows": 3,					// 目标行数
							"examined_rows": 3,			// 检查行数
							"number_of_tmp_files": 0,	// 临时文件的数量
							"sort_buffer_size": 200704,	// sort_buffer大小
							"sort_mode": "<sort_key, packed_additional_fields>"	// 单路排序
						} /* filesort_summary */
					}
				] /* steps */
			} /* join_execution */
		}
	] /* steps */
}

结论："access_type": "scan" 全表扫描的成本低于索引扫描。
所以 MySQL 最终选择全表扫描。

2.2 索引查询

分析 trace 字段。

select * from employees where name > 'zzz' order by position;
SELECT * FROM information_schema.OPTIMIZER_TRACE;

3. 预估表的访问成本

{
	// 3. 预估表的访问成本
	"rows_estimation": [{
		"table": "`employees`",
		// 范围分析
		"range_analysis": {
			// 3.1 全表扫描情况
			"table_scan": {
				"rows": 3,		// 扫描行数
				"cost": 3.7		// 查询成本
			} /* table_scan */ ,
			/// 3.2 查询可能使用的索引
			"potential_range_indexes": [{
					"index": "PRIMARY",	// 主键索引
					"usable": false,
					"cause": "not_applicable"
				},
				{
					"index": "idx_name_age_position",	// 辅助索引
					"usable": true,
					"key_parts": [
						"name",
						"age",
						"position",
						"id"
					] /* key_parts */
				}
			] /* potential_range_indexes */ ,
			// 设置范围条件
			"setup_range_conditions": [] /* setup_range_conditions */ ,
			// 组索引范围
			"group_index_range": {
				"chosen": false,
				"cause": "not_group_by_or_distinct"
			} /* group_index_range */ ,
			// 3.3 分析各个索引使用成本
			"analyzing_range_alternatives": {
				// 扫描范围
				"range_scan_alternatives": [{
					"index": "idx_name_age_position",
					"ranges": [
						"zzz < name"		// 索引使用范围
					] /* ranges */ ,
					"index_dives_for_eq_ranges": true,
					"rowid_ordered": false,	// 使用该索引获取的记录是否按照主键排序
					"using_mrr": false,
					"index_only": false,	// 是否使用覆盖索引
					"rows": 1,				// 索引扫描行数
					"cost": 2.21,			// 索引使用成本
					"chosen": true			// 是否选择该索引
				}] /* range_scan_alternatives */ ,
				// 分析行序交集
				"analyzing_roworder_intersect": {
					"usable": false,					// 不实用
					"cause": "too_few_roworder_scans"	// 扫描次数太少
				} /* analyzing_roworder_intersect */
			} /* analyzing_range_alternatives */ ,
			// 3.4 所选访问范围
			"chosen_range_access_summary": {
				"range_access_plan": {
					"type": "range_scan",
					"index": "idx_name_age_position",
					"rows": 1,
					"ranges": [
						"zzz < name"
					] /* ranges */
				} /* range_access_plan */ ,
				"rows_for_plan": 1,
				"cost_for_plan": 2.21,
				"chosen": true
			} /* chosen_range_access_summary */
		} /* range_analysis */
	}] /* rows_estimation */
}

4. 考虑执行计划

{
	// 4. 考虑执行计划
	"considered_execution_plans": [
		{
			"plan_prefix": [] /* plan_prefix */ ,
			"table": "`employees`",
			// 4.1 最优访问路径
			"best_access_path": {
				// 最终选择的访问路径
				"considered_access_paths": [
					{
						"rows_to_scan": 1,		// 扫描行
						"access_type": "range",	// 访问类型（`range`为索引范围查询）
						"range_details": {		// 范围明细
							"used_index": "idx_name_age_position"	// 使用索引
						} /* range_details */ ,
						"resulting_rows": 1,	// 结果行
						"cost": 2.41,			// 索引使用成本
						"chosen": true,			// 确定选择
						"use_tmp_table": true	// 使用临时表
					}
				] /* considered_access_paths */
			} /* best_access_path */ ,
			"condition_filtering_pct": 100,
			"rows_for_plan": 1,					// 计划行数
			"cost_for_plan": 2.41,				// 计划成本
			"sort_cost": 1,						// 排序成本
			"new_cost_for_plan": 3.41,			// 计划新成本 = 计划成本 + 排序成本
			"chosen": true						// 确定选择
		}
	] /* considered_execution_plans */
},

8. 完善计划

// 8. 完善计划
"refine_plan": [
	{
		"table": "`employees`",
		"pushed_index_condition": "(`employees`.`name` > 'zzz')",
		"table_condition_attached": null
	}
] /* refine_plan */

结论："access_type": "range" 索引扫描的成本低于全表扫描。
所以 MySQL 最终选择索引扫描。

二、`ORDER BY` 与 `GROUP BY` 优化

EXPLAIN SELECT * FROM employees WHERE name = 'LiLei' AND position = 'dev' ORDER BY age;

分析：利用最左前缀法则。
中间字段不能断，因此查询只用到了 name 索引。
从 key_len = 74 也能看出，age 索引列用在排序过程中。
因为 Extra 字段里没有 using filesort（将用外部文件排序，而不是索引排序）。

EXPLAIN SELECT * FROM employees WHERE name = 'LiLei' ORDER BY position;

分析：key_len = 74，查询使用了 name 索引，由于用了 position 进行排序，跳过了 age。
Extra 字段出现了 Using filesort（将用外部文件排序，而不是索引排序）。

EXPLAIN SELECT * FROM employees WHERE name = 'LiLei' ORDER BY age,position;

分析：查询只用到 name 索引，age 和 position 字段用于排序。
Extra 字段无 Using filesort（将用外部文件排序，而不是索引排序）。

EXPLAIN SELECT * FROM employees WHERE name = 'LiLei' ORDER BY position,age;

分析：Extra 字段出现了 Using filesort（将用外部文件排序而不是索引排序）
因为索引的顺序为 name,age,position。
但是排序的时候 age,position 颠倒位置了。

EXPLAIN SELECT * FROM employees WHERE name = 'LiLei' AND age = 18 ORDER BY position,age;

分析：Extra 字段出现 Using index condition。
因为 age 为常量，在排序中被优化，所以索引未颠倒。

# Using filesort
EXPLAIN SELECT * FROM employees WHERE name = 'LiLei' ORDER BY age ASC, position DESC;
EXPLAIN SELECT * FROM employees WHERE name = 'LiLei' ORDER BY age ASC, position ASC;
EXPLAIN SELECT * FROM employees WHERE name = 'LiLei' ORDER BY age DESC, position DESC;

分析：虽然排序的字段列与索引顺序一样。
因为 order by 默认升序，这里 position desc 降序，导致与索引的排序方式不同。
从而产生 Using filesort。
MySQL 8.0 以上版本有降序索引，可以支持该种索引排序方式。

EXPLAIN SELECT * FROM employees WHERE name IN ('LiLei', 'qs') ORDER BY age,position;

分析：对于排序来说，多个相等条件也是范围查询。

EXPLAIN SELECT * FROM employees WHERE name > 'a' ORDER BY name;

没有用覆盖索引优化。

EXPLAIN SELECT name,age,position FROM employees WHERE name > 'a' ORDER BY name;

8.1 使用覆盖索引优化。

三、`ORDER BY` 与 `GROUP BY` 优化总结

MySQL 支持两种排序方式 filesort 和 index。
Using index 是指 MySQL 扫描索引本身完成排序。
index 效率高，filesort 效率低。
order by 满足两种情况会使用 Using index。

order by 语句使用索引最左前列。
使用 where 子句与 order by 子句条件列组合，满足索引最左前列。

尽量在索引列上完成排序，遵循索引建立时的 最左前缀法则（索引创建的顺序）。
如果 order by 的条件不在索引列上，就会产生 Using filesort。
尽量使用覆盖索引。
group by 与 order by 很类似，其本质是先排序后分组。
遵照索引创建顺序的 最左前缀法则。
对于 group by 的优化。
如果不需要排序，可以加上 order by null 禁止排序。
where 优于 having。
能写在 where 子句中限定条件，就不要在 having 子句限定了。

四、`Using filesort` 文件排序

filesort 文件排序分两种方式。

单路排序。
是一次性取出满足条件行的所有字段，然后在 sort buffer 中进行排序。
用 trace工具，可以看到 sort_mode 信息里显示 <sort_key,additional_fields> 或 <sort_key,packed_additional_fields>。
双路排序（又叫回表排序模式）。
首先根据相应的条件，取出相应的排序字段和可以直接定位行数据的行Id。
然后在 sort buffer 中进行排序，排序完后需要再次取回其它需要查询字段。
用 trace 工具可以看到 sort_mode 信息里显示 <sort_key, rowid>。

MySQL 通过比较系统变量 max_length_for_sort_data（默认1024字节）的大小和需要查询的字段总大小来判断使用那种排序模式。

如果 max_length_for_sort_data 比查询字段的总长度大，那么使用 单路排序 模式；
如果 max_length_for_sort_data 比查询字段的总长度小，那么使用 双路排序 模式。

EXPLAIN SELECT * FROM employees WHERE name = 'qs' ORDER BY position;

Extra：Using filesort

1. 单路排序

# 开启trace
set session optimizer_trace="enabled=on",end_markers_in_json=on;
# 关闭trace
set session optimizer_trace="enabled=off";

SELECT * FROM employees WHERE name = 'qs' ORDER BY position;
SELECT * FROM information_schema.OPTIMIZER_TRACE;

{
	// 三、SQL执行阶段
	"join_execution": {
		"select#": 1,
		"steps": [
			{
				// 1. 文件排序信息
				"filesort_information": [
					{
						"direction": "asc",		// 方向
						"table": "`employees`",	// 表
						"field": "position"		// 字段
					}
				] /* filesort_information */ ,
				// 2. 文件排序优先级队列优化
				"filesort_priority_queue_optimization": {
					"usable": false,						// 不可用
					"cause": "not applicable (no LIMIT)"	// 不适用
				} /* filesort_priority_queue_optimization */ ,
				// 3. 文件排序执行
				"filesort_execution": [] /* filesort_execution */ ,
				// 4. 文件排序信息
				"filesort_summary": {
					"rows": 0,					// 预计扫描行数
					"examined_rows": 0,			// 参数排序的行
					"number_of_tmp_files": 0,	// 使用临时文件的个数，这个值如果为0代表全部使用的  sort_buffer 内存排序，否则使用的磁盘文件排序
					"sort_buffer_size": 200704,	// 排序缓存的大小
					"sort_mode": "<sort_key, packed_additional_fields>"	// 排序方式，这里用的单路排序
				} /* filesort_summary */
			}
		] /* steps */
	} /* join_execution */
}

2. 双路排序

2.1 设置 MySQL 系统变量

设置 MySQL 排序数据的最大长度（max_length_for_sort_data）来测试双路排序。

# 查看max_length_for_sort_data大小（默认1024）
SHOW VARIABLES LIKE '%max_length_for_sort_data%';
# 设置max_length_for_sort_data大小
set max_length_for_sort_data = 10;

SELECT * FROM employees WHERE name = 'qs' ORDER BY position;
SELECT * FROM information_schema.OPTIMIZER_TRACE;

{
	"join_execution": {
		"select#": 1,
		"steps": [
			{
				// 1. 文件排序信息
				"filesort_information": [
					{
						"direction": "asc",
						"table": "`employees`",
						"field": "position"
					}
				] /* filesort_information */ ,
				// 2. 文件排序优先级队列优化
				"filesort_priority_queue_optimization": {
					"usable": false,
					"cause": "not applicable (no LIMIT)"
				} /* filesort_priority_queue_optimization */ ,
				// 3. 文件排序执行
				"filesort_execution": [] /* filesort_execution */ ,
				// 4. 文件排序信息
				"filesort_summary": {
					"rows": 0,
					"examined_rows": 0,
					"number_of_tmp_files": 0,
					"sort_buffer_size": 53248,
					"sort_mode": "<sort_key, rowid>"	// 排序方式，这里用的双路排序
				} /* filesort_summary */
			}
		] /* steps */
	} /* join_execution */
}

3. 单路排序和双路排序区别

单路排序

从索引 name 找到第一个满足 name = 'qs' 条件的主键 id；
根据主键 id 取出整行，取出所有字段的值，存入 sort_buffer 中；
从索引 name 找到下一个满足 name = ‘qs’ 条件的主键 id；
重复步骤 2、3 直到不满足 name = ‘qs’；
对 sort_buffer 中的数据按照字段 position 进行排序；
返回结果给客户端。

双路排序

从索引 name 找到第一个满足 name = 'qs' 的主键 id；
根据主键 id 取出整行，把排序字段 position 和主键 id 这两个字段放到 sort buffer 中；
从索引 name 取下一个满足 name = 'qs' 记录的主键 id；
重复 3、4 直到不满足 name = 'qs'；
对 sort_buffer 中的字段 position 和主键 id 按照字段 position 进行排序；
遍历排序好的 id 和字段 position，按照 id 的值回到原表中取出所有字段的值返回给客户端。