SQL引擎 - analyze.cpp分析(二)
(一)SQL简要介绍
数据库的SQL引擎是数据库重要的子系统之一,它对上负责承接应用程序发送过来的SQL语句,对下则负责指挥执行器运行执行计划。其中优化器作为SQL引擎中最重要、最复杂的模块,被称为数据库的“大脑”,优化器产生的执行计划的优劣直接决定数据库的性能。
SQL引擎主要包括查询解析(parser)、查询分流(traffic cop)、查询优化(optimizer)、查询执行(executor)。parser源码目录为/src/common/backend/parser:
(二)parse_analyze对不同语句进行transform
- transformTopLevelStmt
transformTopLevelStmt函数会调用transformOptionalSelectInto函数将原始语法树转换成查询树。
Query *transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree) {
Query *result;
/* We're at top level, so allow SELECT INTO */
result = transformOptionalSelectInto(pstate, parseTree->stmt);
result->stmt_location = parseTree->stmt_location;
result->stmt_len = parseTree->stmt_len;
return result;
}
- transformOptionalSelectInto先特殊处理一下含有into子句的select语句,将其转换为CREATE TABLE AS。
transformOptionalSelectInto先特殊处理一下含有into子句的select语句,将其转换为CREATE TABLE AS。
/* transformOptionalSelectInto -
* If SELECT has INTO, convert it to CREATE TABLE AS.
*
* The only thing we do here that we don't do in transformStmt() is to
* convert SELECT ... INTO into CREATE TABLE AS. Since utility statements
* aren't allowed within larger statements, this is only allowed at the top
* of the parse tree, and so we only try it before entering the recursive
* transformStmt() processing.
*/
static Query *transformOptionalSelectInto(ParseState *pstate, Node *parseTree) {
if (IsA(parseTree, SelectStmt)) {
SelectStmt *stmt = (SelectStmt *) parseTree;
/* If it's a set-operation tree, drill down to leftmost SelectStmt */
while (stmt && stmt->op != SETOP_NONE) stmt = stmt->larg;
Assert(stmt && IsA(stmt, SelectStmt) &&stmt->larg == NULL);
if (stmt->intoClause) {
CreateTableAsStmt *ctas = makeNode(CreateTableAsStmt);
ctas->query = parseTree;
ctas->into = stmt->intoClause;
ctas->relkind = OBJECT_TABLE;
ctas->is_select_into = true;
/* Remove the intoClause from the SelectStmt. This makes it safe for transformSelectStmt to complain if it finds intoClause set (implying that the INTO appeared in a disallowed place). */
stmt->intoClause = NULL;
parseTree = (Node *) ctas;
}
}
return transformStmt(pstate, parseTree);
}
- transformStmt
处理Insert语句(T_InsertStmt):transformInsertStmt(pstate, (InsertStmt *) parseTree)
处理Delete语句(T_DeleteStmt):transformDeleteStmt(pstate, (DeleteStmt *) parseTree)
处理Update语句(T_UpdateStmt):transformUpdateStmt(pstate, (UpdateStmt *) parseTree)
处理Select语句(T_SelectStmt):
处理DeclareCursor语句(T_DeclareCursorStmt):transformDeclareCursorStmt(pstate, (DeclareCursorStmt *) parseTree)
处理Explain语句(T_ExplainStmt):transformExplainStmt(pstate, (ExplainStmt *) parseTree)
等等
(三)transformTopLevelStmt
/*
* transformTopLevelStmt -
* /*将 Parse 树转换为 Query 树。*/
* transform a Parse tree into a Query tree.
*
* /*我们在 transformStmt() 中唯一没有做的就是将 SELECT ... INTO 转换为 CREATE TABLE AS。*/
* /*由于在较大的语句中不允许使用实用程序语句,因此仅允许在顶部使用解析树,所以我们只在进入递归transformStmt()处理之前尝试它。*/
* The only thing we do here that we don't do in transformStmt() is to
* convert SELECT ... INTO into CREATE TABLE AS. Since utility statements
* aren't allowed within larger statements, this is only allowed at the top
* of the parse tree, and so we only try it before entering the recursive
* transformStmt() processing.
*/
Query* transformTopLevelStmt(ParseState* pstate, Node* parseTree, bool isFirstNode, bool isCreateView)
{
if (IsA(parseTree, SelectStmt)) {
SelectStmt* stmt = (SelectStmt*)parseTree;
/* 如果它是一个集合操作树,向下钻取到最左边的 SelectStmt */
while (stmt != NULL && stmt->op != SETOP_NONE)
stmt = stmt->larg;
AssertEreport(stmt && IsA(stmt, SelectStmt) && stmt->larg == NULL, MOD_OPT, "failure to check parseTree");
if (stmt->intoClause) {
CreateTableAsStmt* ctas = makeNode(CreateTableAsStmt);
ctas->query = parseTree;
ctas->into = stmt->intoClause;
ctas->relkind = OBJECT_TABLE;
ctas->is_select_into = true;
/*
* 从 SelectStmt 中删除 intoClause。
* 这使得 transformSelectStmt 在发现 intoClause 集时可以安全地抱怨
* (暗示INTO出现在一个不允许的地方).
*/
stmt->intoClause = NULL;
parseTree = (Node*)ctas;
}
}
if (u_sess->hook_cxt.transformStmtHook != NULL) {
return
((transformStmtFunc)(u_sess->hook_cxt.transformStmtHook))(pstate, parseTree, isFirstNode, isCreateView);
}
return transformStmt(pstate, parseTree, isFirstNode, isCreateView);
}
Query* transformCreateModelStmt(ParseState* pstate, CreateModelStmt* stmt)
{
SelectStmt* select_stmt = (SelectStmt*) stmt->select_query;
stmt->algorithm = get_algorithm_ml(stmt->architecture);
if (stmt->algorithm == INVALID_ALGORITHM_ML) {
ereport(ERROR, (errmodule(MOD_DB4AI), errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Non recognized ML model architecture definition %s", stmt->architecture)));
}
if (SearchSysCacheExists1(DB4AI_MODEL, CStringGetDatum(stmt->model))) {
ereport(ERROR, (errmodule(MOD_DB4AI), errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("The model name \"%s\" already exists in gs_model_warehouse.", stmt->model)));
}
// 在查询计划中为 AI 算子创建投影
// 如果算法有监督,则目标始终是列表的第一个元素
if (is_supervised(stmt->algorithm)) {
if (list_length(stmt->model_features) == 0) {
ereport(ERROR, (errmodule(MOD_DB4AI), errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Supervised ML algorithms require FEATURES clause")));
}else if (list_length(stmt->model_target) == 0) {
ereport(ERROR, (errmodule(MOD_DB4AI), errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Supervised ML algorithms require TARGET clause")));
}else if (list_length(stmt->model_target) > 1) {
ereport(ERROR, (errmodule(MOD_DB4AI), errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Target clause only supports one expression")));
}
}else{
if (list_length(stmt->model_target) > 0) {
ereport(ERROR, (errmodule(MOD_DB4AI), errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Unsupervised ML algorithms cannot have TARGET clause")));
}
}
select_stmt->targetList = NULL;
foreach_cell(it, stmt->model_target) {
select_stmt->targetList = lappend(select_stmt->targetList, lfirst(it));
}
if (list_length(stmt->model_features) > 0) { // 用户给定的投影
foreach_cell(it, stmt->model_features) {
select_stmt->targetList = lappend(select_stmt->targetList, lfirst(it));
}
} else { // 无投影
ResTarget *rt = makeNode(ResTarget);
ColumnRef *cr = makeNode(ColumnRef);
cr->fields = list_make1(makeNode(A_Star));
cr->location = -1;
rt->name = NULL;
rt->indirection = NIL;
rt->val = (Node *)cr;
rt->location = -1;
select_stmt->targetList = lappend(select_stmt->targetList, rt);
}
// 转换我们为训练算子准备的选择查询
Query* select_query = transformStmt(pstate, (Node*) select_stmt);
stmt->select_query = (Node*) select_query;
/* 将命令表示为实用程序查询 */
Query* result = makeNode(Query);
result->commandType = CMD_UTILITY;
result->utilityStmt = (Node*)stmt;
return result;
}
/*
* transformStmt - /*变换语句*/
* recursively transform a Parse tree into a Query tree. /*递归地将解析树转换为查询树*/
*/
Query* transformStmt(ParseState* pstate, Node* parseTree, bool isFirstNode, bool isCreateView)
{
Query* result = NULL;
AnalyzerRoutine *analyzerRoutineHook = (AnalyzerRoutine*)u_sess->hook_cxt.analyzerRoutineHook;
switch (nodeTag(parseTree)) {
/*
* 可优化语句
*/
case T_InsertStmt:
result = transformInsertStmt(pstate, (InsertStmt*)parseTree);
break;
case T_DeleteStmt:
result = transformDeleteStmt(pstate, (DeleteStmt*)parseTree);
break;
case T_UpdateStmt:
result = transformUpdateStmt(pstate, (UpdateStmt*)parseTree);
break;
case T_MergeStmt:
result = transformMergeStmt(pstate, (MergeStmt*)parseTree);
break;
case T_SelectStmt: {
SelectStmt* n = (SelectStmt*)parseTree;
if (n->valuesLists) {
result = transformValuesClause(pstate, n);
} else if (n->op == SETOP_NONE) {
if (analyzerRoutineHook == NULL || analyzerRoutineHook->transSelect == NULL) {
result = transformSelectStmt(pstate, n, isFirstNode, isCreateView);
} else {
result = analyzerRoutineHook->transSelect(pstate, n, isFirstNode, isCreateView);
}
} else {
result = transformSetOperationStmt(pstate, n);
}
} break;
/*
* 特别案例
*/
case T_DeclareCursorStmt:
result = transformDeclareCursorStmt(pstate, (DeclareCursorStmt*)parseTree);
break;
case T_ExplainStmt:
result = transformExplainStmt(pstate, (ExplainStmt*)parseTree);
break;
#ifdef PGXC
case T_ExecDirectStmt:
result = transformExecDirectStmt(pstate, (ExecDirectStmt*)parseTree);
break;
#endif
case T_CreateTableAsStmt:
result = transformCreateTableAsStmt(pstate, (CreateTableAsStmt*)parseTree);
break;
case T_CreateModelStmt:
result = transformCreateModelStmt(pstate, (CreateModelStmt*) parseTree);
break;
default:
/*
* 其他语句不需要任何转换; just return
* 原始分析树顶部贴有查询节点。
*/
result = makeNode(Query);
result->commandType = CMD_UTILITY;
result->utilityStmt = (Node*)parseTree;
break;
}
/* 标记为原始查询,直到我们学习不同 */
result->querySource = QSRC_ORIGINAL;
result->canSetTag = true;
/* 标记同义词对象是否在 rtables 中。 */
result->hasSynonyms = pstate->p_hasSynonyms;
return result;
}