postgres源码解析38 表创建执行全流程梳理--2

本小节主要讲解查询执行模块,有机地调用存储、索引、事务、并发等模块,按照执行计划中的计划节点(操作执行接口)完成数据的读取或者修改。知识回顾:postgres源码解析37 表创建执行全流程梳理–1

关键数据结构

总图:
在这里插入图片描述
在这里插入图片描述

1 PortalData
postgres为每条SQL创建一个portal结构体存储查询计划树链表和执行策略等信息:

 typedef struct PortalData *Portal;
 typedef struct PortalData
{
	/* Bookkeeping data */
	const char *name;			/* portal's name */
	const char *prepStmtName;	/* source prepared statement (NULL if none) */
	MemoryContext portalContext;	/* subsidiary memory for portal */  // portal上下文
	ResourceOwner resowner;		/* resources owned by portal */			// portal持有的资源
	void		(*cleanup) (Portal portal); /* cleanup hook */			// 清理函数

	/*
	 * State data for remembering which subtransaction(s) the portal was
	 * created or used in.  If the portal is held over from a previous
	 * transaction, both subxids are InvalidSubTransactionId.  Otherwise,
	 * createSubid is the creating subxact and activeSubid is the last subxact
	 * in which we ran the portal.
	 */
	SubTransactionId createSubid;	/* the creating subxact */
	SubTransactionId activeSubid;	/* the last subxact with activity */ 

	/* The query or queries the portal will execute */
	const char *sourceText;		/* text of query (as of 8.4, never NULL) */  // 输入的SQL语句
	CommandTag	commandTag;		/* command tag for original query */		 // Tag标识 
	QueryCompletion qc;			/* command completion data for executed query */
	List	   *stmts;			/* list of PlannedStmts */					// PlannedStmt结构体
	CachedPlan *cplan;			/* CachedPlan, if stmts are from one */		// Cache plan

	ParamListInfo portalParams; /* params to pass to query */              	// portal参数链表
	QueryEnvironment *queryEnv; /* environment for query */					// 查询环境

	/* Features/options */
	PortalStrategy strategy;	/* see above */								// 策略
	int			cursorOptions;	/* DECLARE CURSOR option bits */			// 游标 bits
	bool		run_once;		/* portal will only be run once */          //是否运行一次

	/* Status data */
	PortalStatus status;		/* see above */								// portal状态
	bool		portalPinned;	/* a pinned portal can't be dropped */		// 是否被 pinned
	bool		autoHeld;		/* was automatically converted from pinned to
								 * held (see HoldPinnedPortals()) */

	/* If not NULL, Executor is active; call ExecutorEnd eventually: */
	QueryDesc  *queryDesc;		/* info needed for executor invocation */		/// 查询描述符

	/* If portal returns tuples, this is their tupdesc: */
	TupleDesc	tupDesc;		/* descriptor for result tuples */				// 元组描述符
	/* and these are the format codes to use for the columns: */
	int16	   *formats;		/* a format code for each column */				// 列形式

	/*
	 * Outermost ActiveSnapshot for execution of the portal's queries.  For
	 * all but a few utility commands, we require such a snapshot to exist.
	 * This ensures that TOAST references in query results can be detoasted,
	 * and helps to reduce thrashing of the process's exposed xmin.
	 */
	Snapshot	portalSnapshot; /* active snapshot, or NULL if none */     // 快照信息

	/*
	 * Where we store tuples for a held cursor or a PORTAL_ONE_RETURNING or
	 * PORTAL_UTIL_SELECT query.  (A cursor held past the end of its
	 * transaction no longer has any active executor state.)
	 */
	Tuplestorestate *holdStore; /* store for holdable cursors */
	MemoryContext holdContext;	/* memory containing holdStore */

	/*
	 * Snapshot under which tuples in the holdStore were read.  We must keep a
	 * reference to this snapshot if there is any possibility that the tuples
	 * contain TOAST references, because releasing the snapshot could allow
	 * recently-dead rows to be vacuumed away, along with any toast data
	 * belonging to them.  In the case of a held cursor, we avoid needing to
	 * keep such a snapshot by forcibly detoasting the data.
	 */
	Snapshot	holdSnapshot;	/* registered snapshot, or NULL if none */

	/*
	 * atStart, atEnd and portalPos indicate the current cursor position.
	 * portalPos is zero before the first row, N after fetching N'th row of
	 * query.  After we run off the end, portalPos = # of rows in query, and
	 * atEnd is true.  Note that atStart implies portalPos == 0, but not the
	 * reverse: we might have backed up only as far as the first row, not to
	 * the start.  Also note that various code inspects atStart and atEnd, but
	 * only the portal movement routines should touch portalPos.
	 */
	bool		atStart;        
	bool		atEnd;
	uint64		portalPos;

	/* Presentation data, primarily used by the pg_cursors system view */
	TimestampTz creation_time;	/* time at which this portal was defined */
	bool		visible;		/* include this portal in pg_cursors? */
}			PortalData;

根据SQL语句的类型的,pg提供5种执行策略:
1)PORTAL_ONE_SELECT:针对单一SELECT语句,支持游标操作
2)PORTAL_ONE_RETURNING:针对含有RETURNING子句的单一 INSERT/DETELE/UPDATE语句。查询执行时,将首先处理所有条件的元组,在执行过程中缓存,最后返回给客户端
3)PORTAL_ONE_MOD_WITH:针对含有data-modifying CTEs 的单一SELECT语句查询
4)PORTAL_UTIL_SELECT: 针对功能性语句但返回结果类似SELECT语句,如EXPLAIN/SHOW
5)PORTAL_MULTI_QUERY: 除上述4中类型之外的语句,这个策略具有一般性,能够处理一个或者多个操作

typedef enum PortalStrategy
{
	PORTAL_ONE_SELECT,
	PORTAL_ONE_RETURNING,
	PORTAL_ONE_MOD_WITH,
	PORTAL_UTIL_SELECT,
	PORTAL_MULTI_QUERY
} PortalStrategy;

PlannedStmt 计划树头节点为plannnedStmt数据结构,其保存执行器所需的一些信息:语句类型(commandType),查询计划树根节点(planTree)、查询涉及的范围表(rtable)和结果关系表(resultRelation)

typedef struct PlannedStmt
{
	NodeTag		type;

	CmdType		commandType;	/* select|insert|update|delete|utility */

	uint64		queryId;		/* query identifier (copied from Query) */

	bool		hasReturning;	/* is it insert|update|delete RETURNING? */ 

	bool		hasModifyingCTE;	/* has insert|update|delete in WITH? */  

	bool		canSetTag;		/* do I set the command result tag? */

	bool		transientPlan;	/* redo plan when TransactionXmin changes? */

	bool		dependsOnRole;	/* is plan specific to current role? */

	bool		parallelModeNeeded; /* parallel mode required to execute? */ // 并行模式

	int			jitFlags;		/* which forms of JIT should be performed */

	struct Plan *planTree;		/* tree of Plan nodes */

	List	   *rtable;			/* list of RangeTblEntry nodes */  // 范围表

	/* rtable indexes of target relations for INSERT/UPDATE/DELETE */
	List	   *resultRelations;	/* integer list of RT indexes, or NIL */

	List	   *appendRelations;	/* list of AppendRelInfo nodes */

	List	   *subplans;		/* Plan trees for SubPlan expressions; note
								 * that some could be NULL */

	Bitmapset  *rewindPlanIDs;	/* indices of subplans that require REWIND */

	List	   *rowMarks;		/* a list of PlanRowMark's */

	List	   *relationOids;	/* OIDs of relations the plan depends on */  // oid

	List	   *invalItems;		/* other dependencies, as PlanInvalItems */

	List	   *paramExecTypes; /* type OIDs for PARAM_EXEC Params */

	Node	   *utilityStmt;	/* non-null if this is utility stmt */ // 功能性语句

	/* statement location in source string (copied from Query) */
	int			stmt_location;	/* start location, or -1 if unknown */ 
	int			stmt_len;		/* length in bytes; 0 means "rest of string" */
} PlannedStmt;

plan的子类节点通过lefttree和righttree字段完成整个查询计划,其根节点指针被保存在PlannedStmt结构体中。包含代价等信息(执行计划的生成的基于CBO生成的)、投影以及位图参数等信息

typedef struct Plan
{
	NodeTag		type;

	/*
	 * estimated execution costs for plan (see costsize.c for more info)
	 */
	Cost		startup_cost;	/* cost expended before fetching any tuples */
	Cost		total_cost;		/* total cost (assuming all tuples fetched) */

	/*
	 * planner's estimate of result size of this plan step
	 */
	double		plan_rows;		/* number of rows plan is expected to emit */
	int			plan_width;		/* average row width in bytes */

	/*
	 * information needed for parallel query
	 */
	bool		parallel_aware; /* engage parallel-aware logic? */
	bool		parallel_safe;	/* OK to use as part of parallel plan? */

	/*
	 * information needed for asynchronous execution
	 */
	bool		async_capable;	/* engage asynchronous-capable logic? */

	/*
	 * Common structural data for all Plan types.
	 */
	int			plan_node_id;	/* unique across entire final plan tree */
	List	   *targetlist;		/* target list to be computed at this node */
	List	   *qual;			/* implicitly-ANDed qual conditions */
	struct Plan *lefttree;		/* input plan tree(s) */
	struct Plan *righttree;
	List	   *initPlan;		/* Init Plan nodes (un-correlated expr
								 * subselects) */

	/*
	 * Information for management of parameter-change-driven rescanning
	 *
	 * extParam includes the paramIDs of all external PARAM_EXEC params
	 * affecting this plan node or its children.  setParam params from the
	 * node's initPlans are not included, but their extParams are.
	 *
	 * allParam includes all the extParam paramIDs, plus the IDs of local
	 * params that affect the node (i.e., the setParams of its initplans).
	 * These are _all_ the PARAM_EXEC params that affect this node.
	 */
	Bitmapset  *extParam;
	Bitmapset  *allParam;
} Plan;

plan节点在执行过程过会生成对应的PlanState结构指针,计划节点的PlanState结构也会按照查询计划树的结构组织成计划节点执行状态树,

typedef struct PlanState
{
	NodeTag		type;

	Plan	   *plan;			/* associated Plan node */

	EState	   *state;			/* at execution time, states of individual
								 * nodes point to one EState for the whole
								 * top-level plan */

	ExecProcNodeMtd ExecProcNode;	/* function to return next tuple */ // 返回下条元组的函数
	ExecProcNodeMtd ExecProcNodeReal;	/* actual function, if above is a   //真正执行函数
										 * wrapper */

	Instrumentation *instrument;	/* Optional runtime stats for this node */
	WorkerInstrumentation *worker_instrument;	/* per-worker instrumentation */

	/* Per-worker JIT instrumentation */
	struct SharedJitInstrumentation *worker_jit_instrument;

	/*
	 * Common structural data for all Plan types.  These links to subsidiary
	 * state trees parallel links in the associated plan tree (except for the
	 * subPlan list, which does not exist in the plan tree).
	 */
	ExprState  *qual;			/* boolean qual condition */   // 
	struct PlanState *lefttree; /* input plan tree(s) */ 
	struct PlanState *righttree;

	List	   *initPlan;		/* Init SubPlanState nodes (un-correlated expr
								 * subselects) */
	List	   *subPlan;		/* SubPlanState nodes in my expressions */

	/*
	 * State for management of parameter-change-driven rescanning
	 */
	Bitmapset  *chgParam;		/* set of IDs of changed Params */

	/*
	 * Other run-time state needed by most if not all node types.
	 */
	TupleDesc	ps_ResultTupleDesc; /* node's return type */  //元组描述符
	TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */  //存放结果元组槽
	ExprContext *ps_ExprContext;	/* node's expression-evaluation context */// 节点表达式上下文
	ProjectionInfo *ps_ProjInfo;	/* info for doing tuple projection */   // 投影信息

	bool		async_capable;	/* true if node is async-capable */

	/*
	 * Scanslot's descriptor if known. This is a bit of a hack, but otherwise
	 * it's hard for expression compilation to optimize based on the
	 * descriptor, without encoding knowledge about all executor nodes.
	 */
	TupleDesc	scandesc;

	/*
	 * Define the slot types for inner, outer and scanslots for expression
	 * contexts with this state as a parent.  If *opsset is set, then
	 * *opsfixed indicates whether *ops is guaranteed to be the type of slot
	 * used. That means that every slot in the corresponding
	 * ExprContext.ecxt_*tuple will point to a slot of that type, while
	 * evaluating the expression.  If *opsfixed is false, but *ops is set,
	 * that indicates the most likely type of slot.
	 *
	 * The scan* fields are set by ExecInitScanTupleSlot(). If that's not
	 * called, nodes can initialize the fields themselves.
	 *
	 * If outer/inneropsset is false, the information is inferred on-demand
	 * using ExecGetResultSlotOps() on ->righttree/lefttree, using the
	 * corresponding node's resultops* fields.
	 *
	 * The result* fields are automatically set when ExecInitResultSlot is
	 * used (be it directly or when the slot is created by
	 * ExecAssignScanProjectionInfo() /
	 * ExecConditionalAssignProjectionInfo()).  If no projection is necessary
	 * ExecConditionalAssignProjectionInfo() defaults those fields to the scan
	 * operations.
	 */
	const TupleTableSlotOps *scanops;
	const TupleTableSlotOps *outerops;
	const TupleTableSlotOps *innerops;
	const TupleTableSlotOps *resultops;
	bool		scanopsfixed;
	bool		outeropsfixed;
	bool		inneropsfixed;
	bool		resultopsfixed;
	bool		scanopsset;
	bool		outeropsset;
	bool		inneropsset;
	bool		resultopsset;
} PlanState;

TupleDescData结构体记录了元组的属性,引用计数和约束等信息,用于元组各属性的扫描

typedef struct TupleDescData
{
	int			natts;			/* number of attributes in the tuple */
	Oid			tdtypeid;		/* composite type ID for tuple type */
	int32		tdtypmod;		/* typmod for tuple type */
	int			tdrefcount;		/* reference count, or -1 if not counting */
	TupleConstr *constr;		/* constraints, or NULL if none */
	/* attrs[N] is the description of Attribute Number N+1 */
	FormData_pg_attribute attrs[FLEXIBLE_ARRAY_MEMBER];
}			TupleDescData;

执行器执行过中会构造全局执行状态信息Estate,该结构体记录了查询涉及的范围表、上下文、用于在节点之间传递元组的全局元组表和元组上下文(每次使用后便会释放)

typedef struct EState
{
	NodeTag		type;

	/* Basic state for all query types: */
	ScanDirection es_direction; /* current scan direction */
	Snapshot	es_snapshot;	/* time qual to use */
	Snapshot	es_crosscheck_snapshot; /* crosscheck time qual for RI */
	List	   *es_range_table; /* List of RangeTblEntry */
	Index		es_range_table_size;	/* size of the range table arrays */
	Relation   *es_relations;	/* Array of per-range-table-entry Relation
								 * pointers, or NULL if not yet opened */
	struct ExecRowMark **es_rowmarks;	/* Array of per-range-table-entry
										 * ExecRowMarks, or NULL if none */
	PlannedStmt *es_plannedstmt;	/* link to top of plan tree */
	const char *es_sourceText;	/* Source text from QueryDesc */

	JunkFilter *es_junkFilter;	/* top-level junk filter, if any */

	/* If query can insert/delete tuples, the command ID to mark them with */
	CommandId	es_output_cid;

	/* Info about target table(s) for insert/update/delete queries: */
	ResultRelInfo **es_result_relations;	/* Array of per-range-table-entry
											 * ResultRelInfo pointers, or NULL
											 * if not a target table */
	List	   *es_opened_result_relations; /* List of non-NULL entries in
											 * es_result_relations in no
											 * specific order */

	PartitionDirectory es_partition_directory;	/* for PartitionDesc lookup */

	/*
	 * The following list contains ResultRelInfos created by the tuple routing
	 * code for partitions that aren't found in the es_result_relations array.
	 */
	List	   *es_tuple_routing_result_relations;

	/* Stuff used for firing triggers: */
	List	   *es_trig_target_relations;	/* trigger-only ResultRelInfos */

	/* Parameter info: */
	ParamListInfo es_param_list_info;	/* values of external params */
	ParamExecData *es_param_exec_vals;	/* values of internal params */

	QueryEnvironment *es_queryEnv;	/* query environment */

	/* Other working state: */
	MemoryContext es_query_cxt; /* per-query context in which EState lives */

	List	   *es_tupleTable;	/* List of TupleTableSlots */

	uint64		es_processed;	/* # of tuples processed */

	int			es_top_eflags;	/* eflags passed to ExecutorStart */
	int			es_instrument;	/* OR of InstrumentOption flags */
	bool		es_finished;	/* true when ExecutorFinish is done */

	List	   *es_exprcontexts;	/* List of ExprContexts within EState */

	List	   *es_subplanstates;	/* List of PlanState for SubPlans */

	List	   *es_auxmodifytables; /* List of secondary ModifyTableStates */

	/*
	 * this ExprContext is for per-output-tuple operations, such as constraint
	 * checks and index-value computations.  It will be reset for each output
	 * tuple.  Note that it will be created only if needed.
	 */
	ExprContext *es_per_tuple_exprcontext;

	/*
	 * If not NULL, this is an EPQState's EState. This is a field in EState
	 * both to allow EvalPlanQual aware executor nodes to detect that they
	 * need to perform EPQ related work, and to provide necessary information
	 * to do so.
	 */
	struct EPQState *es_epq_active;

	bool		es_use_parallel_mode;	/* can we use parallel workers? */

	/* The per-query shared memory area to use for parallel execution. */
	struct dsa_area *es_query_dsa;

	/*
	 * JIT information. es_jit_flags indicates whether JIT should be performed
	 * and with which options.  es_jit is created on-demand when JITing is
	 * performed.
	 *
	 * es_jit_worker_instr is the combined, on demand allocated,
	 * instrumentation from all workers. The leader's instrumentation is kept
	 * separate, and is combined on demand by ExplainPrintJITSummary().
	 */
	int			es_jit_flags;
	struct JitContext *es_jit;
	struct JitInstrumentation *es_jit_worker_instr;
} EState;
执行器函数的调用

在这里插入图片描述

  • 3
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值