源码链接
概述
在我介绍 execMain.cpp 的前几篇博客中,我解析了 Executor 模块中最重要的几个函数:对execMain.cpp的解析(一)、对execMain.cpp的解析(二)。standard_ExecutorStart()、standard_ExecutorRun()、standard_ExecutorEnd() 对应着下一层的 InitPlan()、ExecutePlan()、ExecEndPlan()。这篇博客,我来解析一下这几个函数以及相关的其它一些函数。
解析
InitPlan()
//代码清单1
//src/gausskernelruntimeexecutorexecMain.cpp
void InitPlan(QueryDesc *queryDesc, int eflags)
{
CmdType operation = queryDesc->operation;
PlannedStmt *plannedstmt = queryDesc->plannedstmt;
Plan *plan = plannedstmt->planTree;
List *rangeTable = plannedstmt->rtable;
EState *estate = queryDesc->estate;
PlanState *planstate = NULL;
TupleDesc tupType = NULL;
ListCell *l = NULL;
int i;
······
estate->es_rowMarks = NIL;
uint64 plan_start_time = time(NULL);
foreach (l, plannedstmt->rowMarks) {
PlanRowMark *rc = (PlanRowMark *)lfirst(l);
Oid relid;
Relation relation = NULL;
ExecRowMark *erm = NULL;
if (rc->isParent) {
continue;
}
switch (rc->markType) {
case ROW_MARK_EXCLUSIVE:
case ROW_MARK_NOKEYEXCLUSIVE:
case ROW_MARK_SHARE:
case ROW_MARK_KEYSHARE:
if (IS_PGXC_COORDINATOR || u_sess->pgxc_cxt.PGXCNodeId < 0 ||
bms_is_member(u_sess->pgxc_cxt.PGXCNodeId, rc->bms_nodeids)) {
relid = getrelid(rc->rti, rangeTable);
relation = heap_open(relid, RowShareLock);
}
break;
case ROW_MARK_REFERENCE:
if (IS_PGXC_COORDINATOR || u_sess->pgxc_cxt.PGXCNodeId < 0 ||
bms_is_member(u_sess->pgxc_cxt.PGXCNodeId, rc->bms_nodeids)) {
relid = getrelid(rc->rti, rangeTable);
relation = heap_open(relid, AccessShareLock);
}
break;
case ROW_MARK_COPY:
case ROW_MARK_COPY_DATUM:
/* there's no real table here ... */
break;
default:
ereport(ERROR, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), errmsg("unrecognized markType: %d when initializing query plan.", rc->markType)));
break;
}
erm = (ExecRowMark *)palloc(sizeof(ExecRowMark));
erm->relation = relation;
erm->rti = rc->rti;
erm->prti = rc->prti;
erm->rowmarkId = rc->rowmarkId;
erm->markType = rc->markType;
erm->noWait = rc->noWait;
erm->waitSec = rc->waitSec;
erm->numAttrs = rc->numAttrs;
ItemPointerSetInvalid(&(erm->curCtid));
estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
}
······
i = 1; /* subplan indices count from 1 */
foreach (l, plannedstmt->subplans)
{
Plan *subplan = (Plan *)lfirst(l);
PlanState *subplanstate = NULL;
int sp_eflags;
sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
if (bms_is_member(i, plannedstmt->rewindPlanIDs))
{
sp_eflags |= EXEC_FLAG_REWIND;
}
if (subplan && (plannedstmt->subplan_ids == NIL ||
#ifdef ENABLE_MULTIPLE_NODES
(IS_PGXC_COORDINATOR && list_nth_int(plannedstmt->subplan_ids, i - 1) != 0) ||
#else
(StreamTopConsumerAmI() && list_nth_int(plannedstmt->subplan_ids, i - 1) != 0) ||
#endif
plannedstmt->planTree->plan_node_id == list_nth_int(plannedstmt->subplan_ids, i - 1)))
{
estate->es_under_subplan = true;
subplanstate = ExecInitNode(subplan, estate, sp_eflags);
······
estate->es_under_subplan = false;
}
estate->es_subplanstates = lappend(estate->es_subplanstates, subplanstate);
i++;
}
planstate = ExecInitNode(plan, estate, eflags);
······
}
该函数用于初始化查询计划,其最重要的部分便是第17~62行以及第65~91行的 for 循环结构其中。第一个 for 循环结构中又包含了一个 switch case 分支语句,它的作用是为存储执行器状态的 estate 的成员变量 es_rowMarks 执行的数组添加新元素,新元素的来源就是存储了和执行计划有关的结构体变量 plannedstmt ,换句话说,这个循环用来将存储有 FOR UPDATE/SHARE 子句信息存储到 estate 中,以便后续执行这个子句。第二个 for 循环用来为每一个子计划初始化私有状态信息,第92行调用了 ExecInitNode() 函数则用来初始化这棵计划树上所有节点的私有状态信息。
ExecutePlan()
//代码清单2
//src/gausskernelruntimeexecutorexecMain.cpp
#ifdef ENABLE_MOT
static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation, bool sendTuples, long numberTuples, ScanDirection direction, DestReceiver *dest, JitExec::JitContext* motJitContext)
#else
static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation, bool sendTuples, long numberTuples, ScanDirection direction, DestReceiver *dest)
#endif
{
TupleTableSlot *slot = NULL;
long current_tuple_count = 0;
bool stream_instrument = false;
bool need_sync_step = false;
bool recursive_early_stop = false;
······
estate->es_direction = direction;
······
for (;;)
{
······
#ifdef ENABLE_MOT
if (unlikely(recursive_early_stop))
{
slot = NULL;
}
else if (motJitContext && !IS_PGXC_COORDINATOR && JitExec::IsMotCodegenEnabled())
{
// MOT LLVM
int scanEnded = 0;
if (!motFinishedExecution)
{
// previous iteration has not signaled end of scan
slot = planstate->ps_ResultTupleSlot;
uint64_t tuplesProcessed = 0;
int rc = JitExec::JitExecQuery(motJitContext, estate->es_param_list_info, slot, &tuplesProcessed, &scanEnded);
if (scanEnded || (tuplesProcessed == 0) || (rc != 0))
{
// raise flag so that next round we will bail out (current tuple still must be reported to user)
motFinishedExecution = true;
}
}
else
{
(void)ExecClearTuple(slot);
}
}
else
{
slot = ExecProcNode(planstate);
}
#else
slot = unlikely(recursive_early_stop) ? NULL : ExecProcNode(planstate);
#endif
······
if (TupIsNull(slot))
{
if(!is_saved_recursive_union_plan_nodeid)
{
break;
}
ExecEarlyFreeBody(planstate);
break;
}
······
if (operation == CMD_SELECT)
{
(estate->es_processed)++;
}
current_tuple_count++;
if (numberTuples == current_tuple_count)
{
break;
}
}
······
}
该函数用于处理查询计划,直到我们获取了参数 numberTuples 所指定个数个元组,同时还设置了继续从数据表中获取元组的方向,方向有三个:
//代码清单3
//src/include/access/sdir.h
typedef enum ScanDirection
{
BackwardScanDirection = -1,
NoMovementScanDirection = 0,
ForwardScanDirection = 1
} ScanDirection;
BackwardScanDirection 代表向后,NoMovementScanDirection 表示保持原来的位置,ForwardScanDirection 表示向前,这些枚举类型的值最终被保存在执行器状态变量 estate 中。在代码清单2第10行,我们定义了一个整型变量 current_tuple_count 来记录已经读取到的元组的数目,那么当读取到的数目达到预定数目时,就会执行第69行开始的 if 语句块。如果属于增删改查类型的命令,那么就执行64行开始的 if 语句块,表明处理成功的元组数目加一。第54行的 if 语句块在用来存储得到的元组的变量 slot 为空的情况下执行,因为没有元组需要处理了,那么将退出循环。一般情况下 slot 接收的是 ExecProcNode() 的返回值,而不会是 NULL 。slot 是TupleTableSlot* 类型的指针变量,TupleTableSlot 结构体如下:
//代码清单4
//src/include/executor/tuptable.h
typedef struct TupleTableSlot {
NodeTag type;
bool tts_isempty; /* true = slot is empty */
bool tts_shouldFree; /* should pfree tts_tuple? */
bool tts_shouldFreeMin; /* should pfree tts_mintuple? */
bool tts_slow; /* saved state for slot_deform_tuple */
Tuple tts_tuple; /* physical tuple, or NULL if virtual */
#ifdef PGXC
char* tts_dataRow; /* Tuple data in DataRow format */
int tts_dataLen; /* Actual length of the data row */
bool tts_shouldFreeRow; /* should pfree tts_dataRow? */
struct AttInMetadata* tts_attinmeta; /* store here info to extract values from the DataRow */
Oid tts_xcnodeoid; /* Oid of node from where the datarow is fetched */
MemoryContext tts_per_tuple_mcxt;
#endif
TupleDesc tts_tupleDescriptor; /* slot's tuple descriptor */
MemoryContext tts_mcxt; /* slot itself is in this context */
Buffer tts_buffer; /* tuple's buffer, or InvalidBuffer */
int tts_nvalid; /* # of valid values in tts_values */
Datum* tts_values; /* current per-attribute values */
bool* tts_isnull; /* current per-attribute isnull flags */
Datum* tts_lobPointers;
MinimalTuple tts_mintuple; /* minimal tuple, or NULL if none */
HeapTupleData tts_minhdr; /* workspace for minimal-tuple-only case */
long tts_off; /* saved state for slot_deform_tuple */
long tts_meta_off; /* saved state for slot_deform_cmpr_tuple */
TableAmType tts_tupslotTableAm; /* slots's tuple table type */
} TupleTableSlot;
代码清单4第12~17行定义的变量用来支持远程从 PGXC 架构下的数据节点之间发送元组,其它的都是和 TupleTableSlot 结构体变量 slot 有关的状态变量。在代码清单2第48、51行我们利用 ExecProcNode() 从计划状态树中获得一个结果元组后,才用 slot 接收了一条条处理过后的元组,接着 ExecutePlan() 函数将根据整个语句的操作类型调用相应的函数进行最后的处理,最终我们需要将它送到元组表,元组表可以认为是在 Estate 结构体变量 estate 中,因为 estate 的成员变量 es_tupleTable 指向了存储了元组表的链表。关于 Estate 结构体可以翻看我先前的博客,关于 TupleTableSlot 结构体可以看一下这篇文章:元组表。
ExecEndPlan()
//代码清单5
//src/gausskernelruntimeexecutorexecMain.cpp
static void ExecEndPlan(PlanState *planstate, EState *estate)
{
······
ListCell *l = NULL;
ExecEndNode(planstate);
foreach (l, estate->es_subplanstates) {
PlanState *subplanstate = (PlanState *)lfirst(l);
ExecEndNode(subplanstate);
}
ExecResetTupleTable(estate->es_tupleTable, false);
······
}
ExecEndPlan() 用来关闭文件并释放内存,以关闭查询计划。主要用到的函数是 ExecEndNode() ,这个函数用来递归地清理以 node 为根节点的计划树上的所有节点,在代码清单5第7行这个 node 就是 planstate ,第10行就是 subplanstate 。最后再调用 ExecResetTupleTable() 来销毁元组表,事实上只是将元组表中的信息清除,只有当该函数的第二个参数为 true 时才会将元组表完全销毁,也就是将存储了元组表的链表给销毁,这些通过回溯源码都能够很清楚地看到。执行器将元组存储在一个元组表中,元组表实际上是一条由多个单独的 TupleTableSlot 结构体组成的链表。这一点可以从 ExecResetTupleTable() 函数中看出来:
//代码清单6
//src/gausskernel/runtime/executor/execTuples.cpp
void ExecResetTupleTable(List* tuple_table, /* tuple table */
bool should_free) /* true if we should free memory */
{
ListCell* lc = NULL;
foreach (lc, tuple_table) {
TupleTableSlot* slot = (TupleTableSlot*)lfirst(lc);
/* Always release resources and reset the slot to empty */
(void)ExecClearTuple(slot);
if (slot->tts_tupleDescriptor) {
ReleaseTupleDesc(slot->tts_tupleDescriptor);
slot->tts_tupleDescriptor = NULL;
}
/* If shouldFree, release memory occupied by the slot itself */
if (should_free) {
if (slot->tts_values)
pfree_ext(slot->tts_values);
if (slot->tts_isnull)
pfree_ext(slot->tts_isnull);
pfree_ext(slot->tts_lobPointers);
if (slot->tts_per_tuple_mcxt)
MemoryContextDelete(slot->tts_per_tuple_mcxt);
pfree_ext(slot);
}
}
/* If shouldFree, release the list structure */
if (should_free) {
list_free_ext(tuple_table);
}
}
该函数的第一个参数 tuple_table 在代码清单6第8行中被引用,而 foreach 我在之前的博客也已经提到了,对pquery.cpp的解析(二):
//代码清单7
//src/include/nodes/pg_list.h
#define foreach(cell, l) for ((cell) = list_head(l); (cell) != NULL; (cell) = lnext(cell))
它的作用就是单独访问每一个链表中的节点,同时 cell 必须是 ListCell* 类型的,l 必须是 List* 类型的。ExecResetTupleTable() 的第二个参数 should_free 的作用也很明显了,它在代码清单6第19和32行开始的 if 判断结构均起作用。
总结
这篇博客主要讲了这三个函数,它们是 Executor 模块后的又一个小模块 Plan 模块对外的接口函数。这几个函数分别在 Executor 模块中的 ExecutorStart()、ExecutorRun()、ExecutorEnd() 函数中被调用,换句话说,Executor 模块将执行计划下沉到 Plan 模块,之后由 Plan 模块接手继续对执行计划进行分析处理。