原始sql
下面展示一些 内联代码片
。
SELECT
max( a.number ) as number ,
sum( a.dodmoney ) as billmoney,
a.carno,
a.color,
a.parkinglotid,
b.name as parkinglotName
FROM
(
SELECT
count(*) AS number,
sum(billmoney ) AS dodmoney,
carno,
color,
parkinglotid
FROM
t_parking_record
WHERE
carno IS NOT NULL AND
outtime BETWEEN '2021-01-14 19:07:00'
AND '2021-09-14 19:07:00'
AND state = 5
GROUP BY
carno,
parkinglotid,
color
) a left join t_parkinglot b
on a.parkinglotid=b.id
GROUP BY
a.carno,
a.color
HAVING
billmoney>=150
order by billmoney desc
先EXPLAIN分析一下:
使用了临时表和文件内存排序
然后再来看一下这表目前的索引
目前表的索引都是单个字段进行的,根据sql的分析结果,由于查询没有走索引在加上group by 最后还有join操作,导致速度下降。在对业务进行分析以后,打算把join操作单独领出来,先把单表结果找出最后在分别查出对应结果。
此问题就转化成了如何优化单表的按条件分组排序查询。
2优化思路将left join连接去除
我们都知道索引是一种有序的数据结构,在查询使用覆盖索引的时候,mysql最后直接返回索引数据而且不用回表操作。顺着这个思路我建立了一个新的索引,这个索引目前只针对这个业务。
索引如下图所示
添加覆盖索引
根据资料
查找了网上一些博客分析GROUP BY 与临时表的关系 :
- 如果GROUP BY 的列没有索引,产生临时表.
- 如果GROUP BY时,SELECT的列不止GROUP BY列一个,并且GROUP BY的列不是主键 ,产生临时表.
- 如果GROUP BY的列有索引,ORDER BY的列没索引.产生临时表.
- 如果GROUP BY的列和ORDER BY的列不一样,即使都有索引也会产生临时表.
- 如果GROUP BY或ORDER BY的列不是来自JOIN语句第一个表.会产生临时表.
- 如果DISTINCT 和 ORDER BY的列没有索引,产生临时表.
想要优化SQL就需要知道SQL是怎么运行的。
现在的SQL是这样的。
修改sql为
select a.carno,
a.color,
sum( a.billmoney ) as billmoney from(
SELECT
carno,
color,
billmoney
FROM
t_parking_record
WHERE
outtime BETWEEN '2021-01-14 19:07:00'
AND '2021-09-14 19:07:00' and
carno IS NOT NULL AND
state = 5
) a
GROUP BY
a.carno,
a.color
having billmoney>=0
order by billmoney desc
查看 optimizer trace 信息
SET OPTIMIZER_TRACE=“enabled=on”,END_MARKERS_IN_JSON=ON; # be readable
SET OPTIMIZER_TRACE_MAX_MEM_SIZE=1000000; # avoid small default
SET optimizer_trace_offset=-30, optimizer_trace_limit=30;
{
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"expanded_query": "/* select#1 */ select `t_parking_record`.`carno` AS `carno`,`t_parking_record`.`color` AS `color`,sum(`t_parking_record`.`billmoney`) AS `billmoney` from `t_parking_record` where ((`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null) and (`t_parking_record`.`state` = 5)) group by `t_parking_record`.`carno`,`t_parking_record`.`color` having (`billmoney` >= 0) order by `billmoney` desc limit 0,1000"
}
] /* steps */
} /* join_preparation */
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "((`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null) and (`t_parking_record`.`state` = 5))",
"steps": [
{
"transformation": "equality_propagation",
"resulting_condition": "((`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null) and multiple equal(5, `t_parking_record`.`state`))"
},
{
"transformation": "constant_propagation",
"resulting_condition": "((`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null) and multiple equal(5, `t_parking_record`.`state`))"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "((`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null) and multiple equal(5, `t_parking_record`.`state`))"
}
] /* steps */
} /* condition_processing */
},
{
"condition_processing": {
"condition": "HAVING",
"original_condition": "(`billmoney` >= 0)",
"steps": [
{
"transformation": "constant_propagation",
"resulting_condition": "(`billmoney` >= 0)"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "(`billmoney` >= 0)"
}
] /* steps */
} /* condition_processing */
},
{
"substitute_generated_columns": {
} /* substitute_generated_columns */
},
{
"table_dependencies": [
{
"table": "`t_parking_record`",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": [
] /* depends_on_map_bits */
}
] /* table_dependencies */
},
{
"ref_optimizer_key_uses": [
{
"table": "`t_parking_record`",
"field": "state",
"equals": "5",
"null_rejecting": false
}
] /* ref_optimizer_key_uses */
},
{
"rows_estimation": [
{
"table": "`t_parking_record`",
"range_analysis": {
"table_scan": {
"rows": 2723798,
"cost": 567538
} /* table_scan */,
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": false,
"cause": "not_applicable"
},
{
"index": "uk_parkingid_intime",
"usable": false,
"cause": "not_applicable"
},
{
"index": "index_state",
"usable": true,
"key_parts": [
"state",
"id"
] /* key_parts */
},
{
"index": "index_parkinglotid",
"usable": false,
"cause": "not_applicable"
},
{
"index": "index_parkingid",
"usable": false,
"cause": "not_applicable"
},
{
"index": "index_carno",
"usable": true,
"key_parts": [
"carno",
"id"
] /* key_parts */
},
{
"index": "index_intime",
"usable": false,
"cause": "not_applicable"
},
{
"index": "index_outtime",
"usable": true,
"key_parts": [
"outtime",
"id"
] /* key_parts */
},
{
"index": "index_creater",
"usable": false,
"cause": "not_applicable"
},
{
"index": "index_modifer",
"usable": false,
"cause": "not_applicable"
},
{
"index": "index_id",
"usable": false,
"cause": "not_applicable"
},
{
"index": "index_ccp",
"usable": true,
"key_parts": [
"outtime",
"carno",
"state",
"color",
"billmoney",
"id"
] /* key_parts */
}
] /* potential_range_indexes */,
"best_covering_index_scan": {
"index": "index_ccp",
"cost": 592547,
"chosen": false,
"cause": "cost"
} /* best_covering_index_scan */,
"setup_range_conditions": [
] /* setup_range_conditions */,
"group_index_range": {
"chosen": false,
"cause": "not_applicable_aggregate_function"
} /* group_index_range */,
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "index_state",
"ranges": [
"5 <= state <= 5"
] /* ranges */,
"index_dives_for_eq_ranges": true,
"rowid_ordered": true,
"using_mrr": false,
"index_only": false,
"rows": 1361899,
"cost": 1.63e6,
"chosen": false,
"cause": "cost"
},
{
"index": "index_carno",
"ranges": [
"NULL < carno"
] /* ranges */,
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 1361899,
"cost": 1.63e6,
"chosen": false,
"cause": "cost"
},
{
"index": "index_outtime",
"ranges": [
"0x99a89d31c0 <= outtime <= 0x99aa9d31c0"
] /* ranges */,
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 1361899,
"cost": 1.63e6,
"chosen": false,
"cause": "cost"
},
{
"index": "index_ccp",
"ranges": [
"0x99a89d31c0 <= outtime <= 0x99aa9d31c0"
] /* ranges */,
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"rows": 1361899,
"cost": 296274,
"chosen": true
}
] /* range_scan_alternatives */,
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
} /* analyzing_roworder_intersect */
} /* analyzing_range_alternatives */,
"chosen_range_access_summary": {
"range_access_plan": {
"type": "range_scan",
"index": "index_ccp",
"rows": 1361899,
"ranges": [
"0x99a89d31c0 <= outtime <= 0x99aa9d31c0"
] /* ranges */
} /* range_access_plan */,
"rows_for_plan": 1361899,
"cost_for_plan": 296274,
"chosen": true
} /* chosen_range_access_summary */
} /* range_analysis */
}
] /* rows_estimation */
},
{
"considered_execution_plans": [
{
"plan_prefix": [
] /* plan_prefix */,
"table": "`t_parking_record`",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "ref",
"index": "index_state",
"rows": 1.36e6,
"cost": 340708,
"chosen": true
},
{
"rows_to_scan": 1361899,
"access_type": "range",
"range_details": {
"used_index": "index_ccp"
} /* range_details */,
"resulting_rows": 340475,
"cost": 568654,
"chosen": false
}
] /* considered_access_paths */
} /* best_access_path */,
"condition_filtering_pct": 25,
"rows_for_plan": 340475,
"cost_for_plan": 340708,
"chosen": true
}
] /* considered_execution_plans */
},
{
"attaching_conditions_to_tables": {
"original_condition": "((`t_parking_record`.`state` = 5) and (`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null))",
"attached_conditions_computation": [
] /* attached_conditions_computation */,
"attached_conditions_summary": [
{
"table": "`t_parking_record`",
"attached": "((`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null))"
}
] /* attached_conditions_summary */
} /* attaching_conditions_to_tables */
},
{
"clause_processing": {
"clause": "ORDER BY",
"original_clause": "`billmoney` desc",
"items": [
{
"item": "sum(`t_parking_record`.`billmoney`)"
}
] /* items */,
"resulting_clause_is_simple": false,
"resulting_clause": "`billmoney` desc"
} /* clause_processing */
},
{
"clause_processing": {
"clause": "GROUP BY",
"original_clause": "`t_parking_record`.`carno`,`t_parking_record`.`color`",
"items": [
{
"item": "`t_parking_record`.`carno`"
},
{
"item": "`t_parking_record`.`color`"
}
] /* items */,
"resulting_clause_is_simple": true,
"resulting_clause": "`t_parking_record`.`carno`,`t_parking_record`.`color`"
} /* clause_processing */
},
{
"added_back_ref_condition": "((`t_parking_record`.`state` <=> 5) and ((`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null)))"
},
{
"reconsidering_access_paths_for_index_ordering": {
"clause": "GROUP BY",
"index_order_summary": {
"table": "`t_parking_record`",
"index_provides_order": false,
"order_direction": "undefined",
"index": "index_state",
"plan_changed": false
} /* index_order_summary */
} /* reconsidering_access_paths_for_index_ordering */
},
{
"refine_plan": [
{
"table": "`t_parking_record`",
"pushed_index_condition": "(`t_parking_record`.`state` <=> 5)",
"table_condition_attached": "((`t_parking_record`.`outtime` between '2021-01-14 19:07:00' and '2021-09-14 19:07:00') and (`t_parking_record`.`carno` is not null))"
}
] /* refine_plan */
},
{
"sort_using_internal_table": {
"condition_for_sort": "(`billmoney` >= 0)",
"having_after_sort": null
} /* sort_using_internal_table */
}
] /* steps */
} /* join_optimization */
},
{
"join_execution": {
"select#": 1,
"steps": [
{
"creating_tmp_table": {
"tmp_table_info": {
"table": "intermediate_tmp_table",
"row_length": 133,
"key_length": 126,
"unique_constraint": false,
"location": "memory (heap)",
"row_limit_estimate": 126144
} /* tmp_table_info */
} /* creating_tmp_table */
},
{
"filesort_information": [
{
"direction": "desc",
"table": "intermediate_tmp_table",
"field": "billmoney"
}
] /* filesort_information */,
"filesort_priority_queue_optimization": {
"usable": false,
"cause": "not applicable (no LIMIT)"
} /* filesort_priority_queue_optimization */,
"filesort_execution": [
] /* filesort_execution */,
"filesort_summary": {
"rows": 64616,
"examined_rows": 64616,
"number_of_tmp_files": 0,
"sort_buffer_size": 1615656,
"sort_mode": "<sort_key, rowid>"
} /* filesort_summary */
}
] /* steps */
} /* join_execution */
}
] /* steps */
}
前面一大堆都是分析索引还有扫描行数,计算查询代价,最后执行计划看
根据最后的索引代价并没有走我们想要的索引
“considered_execution_plans”: [
{
“plan_prefix”: [
] /* plan_prefix /,
“table”: “t_parking_record
”,
“best_access_path”: {
“considered_access_paths”: [
{
“access_type”: “ref”,
“index”: “index_state”,
“rows”: 1.36e6,
“cost”: 340708,
“chosen”: true
},
{
“rows_to_scan”: 1361899,
“access_type”: “range”,
“range_details”: {
“used_index”: “index_ccp”
} / range_details /,
“resulting_rows”: 340475,
“cost”: 568654,
“chosen”: false
}
] / considered_access_paths /
} / best_access_path /,
“condition_filtering_pct”: 25,
“rows_for_plan”: 340475,
“cost_for_plan”: 340708,
“chosen”: true
}
最后选的是index_state,并没有走我们的索引于是调整索引
顺序,进行索引覆盖
调整后如下图所示
新的trace 如下
截取了相关的最后索引选择
“considered_execution_plans”: [
{
“plan_prefix”: [
] / plan_prefix /,
“table”: “t_parking_record
”,
“best_access_path”: {
“considered_access_paths”: [
{
“access_type”: “ref”,
“index”: “index_state”,
“rows”: 1.36e6,
“cost”: 340708,
“chosen”: true
},
{
“access_type”: “ref”,
“index”: “index_cpp”,
“rows”: 1.36e6,
“cost”: 296274,
“chosen”: true
},
{
“rows_to_scan”: 1361899,
“access_type”: “range”,
“range_details”: {
“used_index”: “index_cpp”
} / range_details /,
“resulting_rows”: 680950,
“cost”: 568654,
“chosen”: false
}
] / considered_access_paths /
} / best_access_path /,
“condition_filtering_pct”: 25,
“rows_for_plan”: 340475,
“cost_for_plan”: 296274,
“chosen”: true
}
] / considered_execution_plans /
join_execution
“join_execution”: {
“select#”: 1,
“steps”: [
{
“creating_tmp_table”: {
“tmp_table_info”: {
“table”: “intermediate_tmp_table”,
“row_length”: 133,
“key_length”: 126,
“unique_constraint”: false,
“location”: “memory (heap)”,
“row_limit_estimate”: 126144
} / tmp_table_info /
} / creating_tmp_table /
},
{
“converting_tmp_table_to_ondisk”: {
“cause”: “memory_table_size_exceeded”,
“tmp_table_info”: {
“table”: “intermediate_tmp_table”,
“row_length”: 133,
“key_length”: 126,
“unique_constraint”: false,
“location”: “disk (InnoDB)”,
“record_format”: “fixed”
} / tmp_table_info /
} / converting_tmp_table_to_ondisk /
},
{
“filesort_information”: [
{
“direction”: “desc”,
“table”: “intermediate_tmp_table”,
“field”: “billmoney”
}
] / filesort_information /,
“filesort_priority_queue_optimization”: {
“usable”: false,
“cause”: “not applicable (no LIMIT)”
} / filesort_priority_queue_optimization /,
“filesort_execution”: [
] / filesort_execution /,
“filesort_summary”: {
“rows”: 117701,
“examined_rows”: 117701,
“number_of_tmp_files”: 21,
“sort_buffer_size”: 223352,
“sort_mode”: “<sort_key, packed_additional_fields>”
} / filesort_summary /
}
] / steps /
} / join_execution */
1尝试在堆上使用memory的内存临时表来存放group by的数据,发现内存不够;
2创建一张临时表, carno,
color,
SUM( billmoney ) AS billmoney
3从索引index_cpp中取出1行,插入临时表。插入规则是如果carno不存在则直接插入,如果存在,则把billmoney 的值累加在billmoney 上;
4循环遍历索引index_cpp上满足条件之间的所有行,执行步骤3;
对临时表根据billmoney 的值做优先队列排序;
5取出最后留在堆(优先队列的堆)里面的行数据,作为结果集直接返回,不需要再回表;
优化方案2
扩充临时表空间上限大小
也就是说这里临时表的限制是16M,max_heap_table_size大小也受tmp_table_size大小的限制。
所以我们这里调整为32MB,然后执行原始的SQL
这里有个问题就是随着查询结果的变大需要再次调整大小
方案3 使用 SQL_BIG_RESULT 优化
告诉优化器,查询结果比较多,临时表直接走磁盘存储。
SELECT SQL_BIG_RESULT
carno,
color,
SUM( billmoney ) AS billmoney
FROM
t_parking_record
WHERE
state = 5
AND outtime BETWEEN '2018-01-14 19:07:00'
AND '2021-09-14 19:07:00' AND
carno IS NOT NULL
GROUP BY
carno,
color
HAVING billmoney>=0
ORDER BY billmoney DESC
顺便值得一提的是: 当我把数据量翻倍之后,使用该方式,查询时间基本没变。因为扫描的行数还是不变的。实际测试耗时4.197484
总结
方案2需要调整临时表内存的大小,可行;不过当数据库超过32MB时,如果使用该方式,还需要继续提升临时表大小;
方案3通过索引优化完,直接声明使用磁盘来放临时表,整体响应时间快了一半。
所以最后对比,选择方案3比较合适。
后续疑问
为什么仅仅是不再尝试往内存临时表里写入这一步会相差一半的性能?
有没有工具能够统计 SQL 执行过程中的 I/O 次数?
2021年11月11日更新,对于公司的计算分析业务是OLAP场景
其实可以使用阿里开源中间件Canal 实时同步MySQL数据到es,
查询的时候走es就可以了