1. SQL语句
sql语句:select sql_no_cache id1 from t1 union select id21 from t2
执行计划:
mysql> explain select sql_no_cache id1 from t1 union select id21 from t2 \G
*************************** 1. row ***************************
id: 1
select_type: PRIMARY
table: t1
partitions: NULL
type: ALL
possible_keys: NULL
key: NULL
key_len: NULL
ref: NULL
rows: 4
filtered: 100.00
Extra: NULL
*************************** 2. row ***************************
id: 2
select_type: UNION
table: t2
partitions: NULL
type: ALL
possible_keys: NULL
key: NULL
key_len: NULL
ref: NULL
rows: 2
filtered: 100.00
Extra: NULL
*************************** 3. row ***************************
id: NULL
select_type: UNION RESULT
table: <union1,2>
partitions: NULL
type: ALL
possible_keys: NULL
key: NULL
key_len: NULL
ref: NULL
rows: NULL
filtered: NULL
Extra: Using temporary
3 rows in set, 2 warnings (0.00 sec)
2. union 语法分析
%token UNION_SYM /* SQL-2003-R */
select_option:
...
| SQL_NO_CACHE_SYM
{
push_deprecated_warn_no_replacement(YYTHD, "SQL_NO_CACHE");
/*
Allow this flag only on the first top-level SELECT statement, if
SQL_CACHE wasn't specified, and only once per query.
*/
$$.query_spec_options= 0;
$$.sql_cache= SELECT_LEX::SQL_NO_CACHE;
}
union_list:
UNION_SYM union_option select_init
{
$$= NEW_PTN PT_union_list($2, $3);
}
;
union_opt:
/* Empty */ { $$= NULL; }
| union_list { $$= $1; }
| union_order_or_limit { $$= $1; }
;
/* Need first branch for subselects. */
select_init:
SELECT_SYM select_part2 opt_union_clause
{
$$= NEW_PTN PT_select_init2($1, $2, $3);
}
| '(' select_paren ')' union_opt
{
$$= NEW_PTN PT_select_init_parenthesis($2, $4);
}
;
select_part2:
...
| select_options_and_item_list /* #1 */
opt_into /* #2 */
from_clause /* #3 */
opt_where_clause /* #4 */
opt_group_clause /* #5 */
opt_having_clause /* #6 */
opt_order_clause /* #7 */
opt_limit_clause /* #8 */
opt_procedure_analyse_clause /* #9 */
opt_into /* #10 */
opt_select_lock_type /* #11 */
{
if ($2 && $10)
{
/* double "INTO" clause */
YYTHD->parse_error_at(@10, ER(ER_SYNTAX_ERROR));
MYSQL_YYABORT;
}
if ($9 && ($2 || $10))
{
/* "INTO" with "PROCEDURE ANALYSE" */
my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "INTO");
MYSQL_YYABORT;
}
$$= NEW_PTN PT_select_part2($1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11);
}
3. 主要涉及的类及函数
/**
This class represents a query expression (one query block or
several query blocks combined with UNION).
*/
class st_select_lex_unit;
st_select_lex *LEX::new_query(st_select_lex *curr_select) 函数下
// Link the new "unit" below the current select_lex, if any
if (curr_select != NULL)
sel_unit->include_down(this, curr_select);
void st_select_lex_unit::include_chain(LEX *lex, st_select_lex *outer)
typedef class st_select_lex_unit SELECT_LEX_UNIT;
typedef class st_select_lex SELECT_LEX;
class Query_result_union_direct :public Query_result_union
4. union 流程
MySQL中负责分析和存储一个select语句信息的数据结构是 st_select_lex类 (对应宏 SELECT_LEX); 分析和存储union的数据结构是 st_select_lex_unit (对应宏 SELECT_LEX_UNIT);
对于 st_select_lex 类,主要存储select语句中的返回列信息、from子句中的表信息、order by等的列信息等;对于union 操作,对应st_select_lex 类中成员变量 st_select_lex_unit *master, st_select_lex_unit *slave , 通过成员st_select_lex *next 将语句串联起来。
对于语句 select id1 from t1 union select id21 from t2 :
st_select_lex -> next -> st_select_lex
select id1 from t1 select id21 from t2
SELECT_LEX_UNIT 负责管理处于同一级的进行union 操作的数个select子句对应的SELECT_LEX;SELECT_LEX_UNIT 通过成员 SELECT_LEX *slave 存储属于这一级的进行union操作的第一个SELECT_LEX,而这一级别其他参与union操作的SELECT_LEX 可以通过成员 st_select_lex_unit *next 串成链表依次找到。
SELECT_LEX_UNIT类中还有一个成员变量 fake_select_lex , 用来保存整个union操作的order by 及limit等条件。
st_select_lex_unit
|
| slave
|
|
st_select_lex -> next -> st_select_lex
select id1 from t1 select id21 from t2
st_select_lex *LEX::new_query(st_select_lex *curr_select) 函数中
select->include_down(this, sel_unit);
st_select_lex *LEX::new_union_query(st_select_lex *curr_select, bool distinct)
5. 函数流程
-> mysql_execute_command
-> execute_sqlcom_select
-> handle_query
-> st_select_lex_unit::execute
-> JOIN::exec
-> do_select
-> sub_select
-> evaluate_join_record
-> end_send
-> Query_result_union::send_data
-> handler::ha_write_row
-> ha_heap::write_row
-> heap_write
bool st_select_lex_unit::execute(THD *thd)
对于sql语句 select id1 from t1 union select id21 from t2 抽象语法树对应
SELECT_LEX : select id1 from t1
SELECT_LEX : select id21 from t2
fake_select_lex : 保存整个 union 语句结果信息
1. 执行 SELECT_LEX (select id1 from t1) 通过 rr_sequential 函数从表t1 中进行扫描获取数据
将数据存放在临时文件中。
2. 执行 SELECT_LEX (select id21 from t2) 通过 rr_sequential 函数从表t2 中进行扫描获取数据
将数据存放在临时文件中 。
3. 执行 fake_select_lex ,
通过函数 rr_sequential 扫描临时文件,
判断是否是重复数据,在函数 bool Query_result_union::send_data(List<Item> &values) 中的
const int error= table->file->ha_write_row(table->record[0])
若非重复数据,则 error =0,若重复数据则error=127.
(内部是调用函数 int ha_heap::write_row(uchar * buf) 中的 res= heap_write(file,buf)
会根据hash值判断插入的数据是否已经存在,若不存在则插入)
创建 union结果的存储对象在函数
bool Query_result_union::create_result_table(THD *thd_arg,
List<Item> *column_types,
bool is_union_distinct,
ulonglong options,
const char *table_alias,
bool bit_fields_as_long,
bool create_table)
{
table->file->extra(HA_EXTRA_WRITE_CACHE);
table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
}
6 参考
[How MySQL Transforms Subqueries](https://dev.mysql.com/doc/internals/en/transformations.html)