PostgreSQL内核开发过程介绍(五)- 语句分析之原始解析树
转换阶段
原始解析树经过函数parse_analyze得到查询树部分调用栈如下:
transformStmt analyze.c:299
transformOptionalSelectInto analyze.c:268
transformTopLevelStmt analyze.c:218
parse_analyze analyze.c:127
pg_analyze_and_rewrite postgres.c:656
exec_simple_query postgres.c:1129
PostgresMain postgres.c:4508
BackendRun postmaster.c:4537
BackendStartup postmaster.c:4259
ServerLoop postmaster.c:1745
PostmasterMain postmaster.c:1417
main main.c:209
transformStmt 递归转换解析树为查询树,根据Node类型调用不同的函数
- T_InsertStmt: transformInsertStmt
- T_DeleteStmt: transformDeleteStmt
- T_UpdateStmt: transformUpdateStmt
- T_SelectStmt: 如果valuesLists有值transformValuesClause; 如果op为SETNOP_NODE transformSelectStmt; 否则调用transformSetOperationStmt
- T_ReturnStmt:transformReturnStmt
- T_PLAssignStmt:transformPLAssignStmt
- T_DeclareCursorStmt:transformDeclareCursorStmt
- T_ExplainStmt:transformExplainStmt
- T_CreateTableAsStmt:transformCreateTableAsStmt
- T_CallStmt:transformCallStmt
涉及结构体
typedef struct Query
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete|utility */
QuerySource querySource; /* where did I come from? */
uint64 queryId; /* query identifier (can be set by plugins) */
bool canSetTag; /* do I set the command result tag? */
Node *utilityStmt; /* non-null if commandType == CMD_UTILITY */
int resultRelation; /* rtable index of target relation for
* INSERT/UPDATE/DELETE; 0 for SELECT */
bool hasAggs; /* has aggregates in tlist or havingQual */
bool hasWindowFuncs; /* has window functions in tlist */
bool hasTargetSRFs; /* has set-returning functions in tlist */
bool hasSubLinks; /* has subquery SubLink */
bool hasDistinctOn; /* distinctClause is from DISTINCT ON */
bool hasRecursive; /* WITH RECURSIVE was specified */
bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
bool hasForUpdate; /* FOR [KEY] UPDATE/SHARE was specified */
bool hasRowSecurity; /* rewriter has applied some RLS policy */
bool isReturn; /* is a RETURN statement */
List *cteList; /* WITH list (of CommonTableExpr's) */
List *rtable; /* list of range table entries */
FromExpr *jointree; /* table join tree (FROM and WHERE clauses) */
List *targetList; /* target list (of TargetEntry) */
OverridingKind override; /* OVERRIDING clause */
OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] */
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
bool groupDistinct; /* is the group by clause distinct? */
List *groupingSets; /* a list of GroupingSet's if present */
Node *havingQual; /* qualifications applied to groups */
List *windowClause; /* a list of WindowClause's */
List *distinctClause; /* a list of SortGroupClause's */
List *sortClause; /* a list of SortGroupClause's */
Node *limitOffset; /* # of result tuples to skip (int8 expr) */
Node *limitCount; /* # of result tuples to return (int8 expr) */
LimitOption limitOption; /* limit type */
List *rowMarks; /* a list of RowMarkClause's */
Node *setOperations; /* set-operation tree if this is top level of
* a UNION/INTERSECT/EXCEPT query */
List *constraintDeps; /* a list of pg_constraint OIDs that the query
* depends on to be semantically valid */
List *withCheckOptions; /* a list of WithCheckOption's (added
* during rewrite) */
/*
* The following two fields identify the portion of the source text string
* containing this query. They are typically only populated in top-level
* Queries, not in sub-queries. When not set, they might both be zero, or
* both be -1 meaning "unknown".
*/
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} Query;
typedef enum RTEKind
{
RTE_RELATION, /* ordinary relation reference */
RTE_SUBQUERY, /* subquery in FROM */
RTE_JOIN, /* join */
RTE_FUNCTION, /* function in FROM */
RTE_TABLEFUNC, /* TableFunc(.., column list) */
RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */
RTE_CTE, /* common table expr (WITH list element) */
RTE_NAMEDTUPLESTORE, /* tuplestore, e.g. for AFTER triggers */
RTE_RESULT /* RTE represents an empty FROM clause; such
* RTEs are added by the planner, they're not
* present during parsing or rewriting */
} RTEKind;
/*
* 范围表是RangeTblEntry节点的列表
*
* 一个范围表条目可以表示一个普通关系,一个子选择
* 来自JOIN子句或JOIN子句的结果。(只有显式连接语法
* 产生一个RTE,而不是多个from产生的隐式连接
* 物品。这是因为我们只需要RTE来处理SQL特性
* 比如外连接和连接输出列别名。)其他特殊
* RTE类型也存在,如RTEKind所示。
*
* 请注意,我们认为RTE_RELATION涵盖了所有具有pg_class的对象
* 条目。Relkind区分了子情形。
*
* alias是一个别名节点,表示附加到的AS别名子句
* FROM表达式,如果没有子句,则为NULL。
*
* eref 是表名和列名
/
typedef struct RangeTblEntry
{
NodeTag type;
RTEKind rtekind; /* see above */
/*
* XXX the fields applicable to only some rte kinds should be merged into
* a union. I didn't do this yet because the diffs would impact a lot of
* code that is being actively worked on. FIXME someday.
*/
/*
* Fields valid for a plain relation RTE (else zero):
*
* As a special case, RTE_NAMEDTUPLESTORE can also set relid to indicate
* that the tuple format of the tuplestore is the same as the referenced
* relation. This allows plans referencing AFTER trigger transition
* tables to be invalidated if the underlying table is altered.
*
* rellockmode is really LOCKMODE, but it's declared int to avoid having
* to include lock-related headers here. It must be RowExclusiveLock if
* the RTE is an INSERT/UPDATE/DELETE target, else RowShareLock if the RTE
* is a SELECT FOR UPDATE/FOR SHARE target, else AccessShareLock.
*
* Note: in some cases, rule expansion may result in RTEs that are marked
* with RowExclusiveLock even though they are not the target of the
* current query; this happens if a DO ALSO rule simply scans the original
* target table. We leave such RTEs with their original lockmode so as to
* avoid getting an additional, lesser lock.
*/
Oid relid; /* OID of the relation */
char relkind; /* relation kind (see pg_class.relkind) */
int rellockmode; /* lock level that query requires on the rel */
struct TableSampleClause *tablesample; /* sampling info, or NULL */
/*
* Fields valid for a subquery RTE (else NULL):
*/
Query *subquery; /* the sub-query */
bool security_barrier; /* is from security_barrier view? */
/*
* Fields valid for a join RTE (else NULL/zero):
*
* joinaliasvars is a list of (usually) Vars corresponding to the columns
* of the join result. An alias Var referencing column K of the join
* result can be replaced by the K'th element of joinaliasvars --- but to
* simplify the task of reverse-listing aliases correctly, we do not do
* that until planning time. In detail: an element of joinaliasvars can
* be a Var of one of the join's input relations, or such a Var with an
* implicit coercion to the join's output column type, or a COALESCE
* expression containing the two input column Vars (possibly coerced).
* Elements beyond the first joinmergedcols entries are always just Vars,
* and are never referenced from elsewhere in the query (that is, join
* alias Vars are generated only for merged columns). We keep these
* entries only because they're needed in expandRTE() and similar code.
*
* Within a Query loaded from a stored rule, it is possible for non-merged
* joinaliasvars items to be null pointers, which are placeholders for
* (necessarily unreferenced) columns dropped since the rule was made.
* Also, once planning begins, joinaliasvars items can be almost anything,
* as a result of subquery-flattening substitutions.
*
* joinleftcols is an integer list of physical column numbers of the left
* join input rel that are included in the join; likewise joinrighttcols
* for the right join input rel. (Which rels those are can be determined
* from the associated JoinExpr.) If the join is USING/NATURAL, then the
* first joinmergedcols entries in each list identify the merged columns.
* The merged columns come first in the join output, then remaining
* columns of the left input, then remaining columns of the right.
*
* Note that input columns could have been dropped after creation of a
* stored rule, if they are not referenced in the query (in particular,
* merged columns could not be dropped); this is not accounted for in
* joinleftcols/joinrighttcols.
*/
JoinType jointype; /* type of join */
int joinmergedcols; /* number of merged (JOIN USING) columns */
List *joinaliasvars; /* list of alias-var expansions */
List *joinleftcols; /* left-side input column numbers */
List *joinrightcols; /* right-side input column numbers */
/*
* join_using_alias is an alias clause attached directly to JOIN/USING. It
* is different from the alias field (below) in that it does not hide the
* range variables of the tables being joined.
*/
Alias *join_using_alias;
/*
* Fields valid for a function RTE (else NIL/zero):
*
* When funcordinality is true, the eref->colnames list includes an alias
* for the ordinality column. The ordinality column is otherwise
* implicit, and must be accounted for "by hand" in places such as
* expandRTE().
*/
List *functions; /* list of RangeTblFunction nodes */
bool funcordinality; /* is this called WITH ORDINALITY? */
/*
* Fields valid for a TableFunc RTE (else NULL):
*/
TableFunc *tablefunc;
/*
* Fields valid for a values RTE (else NIL):
*/
List *values_lists; /* list of expression lists */
/*
* Fields valid for a CTE RTE (else NULL/zero):
*/
char *ctename; /* name of the WITH list item */
Index ctelevelsup; /* number of query levels up */
bool self_reference; /* is this a recursive self-reference? */
/*
* Fields valid for CTE, VALUES, ENR, and TableFunc RTEs (else NIL):
*
* We need these for CTE RTEs so that the types of self-referential
* columns are well-defined. For VALUES RTEs, storing these explicitly
* saves having to re-determine the info by scanning the values_lists. For
* ENRs, we store the types explicitly here (we could get the information
* from the catalogs if 'relid' was supplied, but we'd still need these
* for TupleDesc-based ENRs, so we might as well always store the type
* info here). For TableFuncs, these fields are redundant with data in
* the TableFunc node, but keeping them here allows some code sharing with
* the other cases.
*
* For ENRs only, we have to consider the possibility of dropped columns.
* A dropped column is included in these lists, but it will have zeroes in
* all three lists (as well as an empty-string entry in eref). Testing
* for zero coltype is the standard way to detect a dropped column.
*/
List *coltypes; /* OID list of column type OIDs */
List *coltypmods; /* integer list of column typmods */
List *colcollations; /* OID list of column collation OIDs */
/*
* Fields valid for ENR RTEs (else NULL/zero):
*/
char *enrname; /* name of ephemeral named relation */
double enrtuples; /* estimated or actual from caller */
/*
* Fields valid in all RTEs:
*/
Alias *alias; /* user-written alias clause, if any */
Alias *eref; /* expanded reference names */
bool lateral; /* subquery, function, or values is LATERAL? */
bool inh; /* inheritance requested? */
bool inFromCl; /* present in FROM clause? */
AclMode requiredPerms; /* bitmask of required access permissions */
Oid checkAsUser; /* if valid, check access as this role */
Bitmapset *selectedCols; /* columns needing SELECT permission */
Bitmapset *insertedCols; /* columns needing INSERT permission */
Bitmapset *updatedCols; /* columns needing UPDATE permission */
Bitmapset *extraUpdatedCols; /* generated columns being updated */
List *securityQuals; /* security barrier quals to apply, if any */
} RangeTblEntry;
typedef struct FromExpr
{
NodeTag type;
List *fromlist; /* List of join subtrees */
Node *quals; /* qualifiers on join, if any */
} FromExpr;
typedef struct TargetEntry
{
Expr xpr;
Expr *expr; /* expression to evaluate */
AttrNumber resno; /* attribute number (see notes above) */
char *resname; /* name of the column (could be NULL) */
Index ressortgroupref; /* nonzero if referenced by a sort/group
* clause */
Oid resorigtbl; /* OID of column's source table */
AttrNumber resorigcol; /* column's number in source table */
bool resjunk; /* set to true to eliminate the attribute from
* final target list */
} TargetEntry;
typedef struct Var
{
Expr xpr;
Index varno; /* index of this var's relation in the range
* table, or INNER_VAR/OUTER_VAR/INDEX_VAR */
AttrNumber varattno; /* attribute number of this var, or zero for
* all attrs ("whole-row Var") */
Oid vartype; /* pg_type OID for the type of this var */
int32 vartypmod; /* pg_attribute typmod value */
Oid varcollid; /* OID of collation, or InvalidOid if none */
Index varlevelsup; /* for subquery variables referencing outer
* relations; 0 in a normal var, >0 means N
* levels up */
Index varnosyn; /* syntactic relation index (0 if unknown) */
AttrNumber varattnosyn; /* syntactic attribute number */
int location; /* token location, or -1 if unknown */
} Var;
typedef struct RangeTblRef
{
NodeTag type;
int rtindex;
} RangeTblRef;
typedef struct OpExpr
{
Expr xpr;
Oid opno; /* PG_OPERATOR OID of the operator */
Oid opfuncid; /* PG_PROC OID of underlying function */
Oid opresulttype; /* PG_TYPE OID of result value */
bool opretset; /* true if operator returns set */
Oid opcollid; /* OID of collation of result */
Oid inputcollid; /* OID of collation that operator should use */
List *args; /* arguments to the operator (1 or 2) */
int location; /* token location, or -1 if unknown */
} OpExpr;