以下各种C结构对应于SQL的关系大致如下
C struct | SQL | 说明 |
---|---|---|
Node | 用于Node* | |
Var | 列 | |
RangeTablEntry | 表(包括堆表,子查询,连接表) | |
RangeTblRef | 指向RTE | |
JoinExpr | join | |
FromExpr | from | |
Query | 整个SQL |
Node的结构
这个结构体的定义在nodes.h
typedef struct Node{
NodeTag type; //enum
} Node;
Var结构体
var用于表示一列,是在语义分析阶段从ColumnRef转换而来,数据结构定义在源代码的primnodes.h
typedef struct Var{
Expr xpr;
Index varno; //在Query->rtable中的rtindex
AttrNumber varattno; //列在表里编号
Oid vartype; //列属性
int32 vartypemod; //精度/长度
Oid varcollid; //Oid
Index varlevelsup;//列属性位置,和子查询相关
Index varnoold;//varno的初值
AttrNumber varoattno;//varoattno的初值
int location;//列属性在SQL中位置
}Var;
需要说明的是对于varlevelsup而言,0表示当前查询,而1表示父查询
RangeTblEntry
RangeTblEntry的定义在parsenodes.h里,其中哪些字段有用需要根据RTEKind来看.
typedef enum RTEKind{
RTE_RELATION, /* ordinary relation reference */
RTE_SUBQUERY, /* subquery in FROM */
RTE_JOIN, /* join */
RTE_FUNCTION, /* function in FROM */
RTE_TABLEFUNC, /* TableFunc(.., column list) */
RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */
RTE_CTE, /* common table expr (WITH list element) */
RTE_NAMEDTUPLESTORE, /* tuplestore, e.g. for AFTER triggers */
RTE_RESULT /* RTE represents an empty FROM clause; such
* RTEs are added by the planner, they're not
* present during parsing or rewriting */
} RTEKind;
typedef struct RangeTblEntry{
NodeTag type;
RTEKind rtekind; /* see above */
/* Fields valid for a plain relation RTE (else zero):*/
Oid relid; /* OID of the relation */
char relkind; /* relation kind (see pg_class.relkind) */
int rellockmode; /* lock level that query requires on the rel */
struct TableSampleClause *tablesample; /* sampling info, or NULL */
/* Fields valid for a subquery RTE (else NULL): */
Query *subquery; /* the sub-query */
bool security_barrier; /* is from security_barrier view? */
/* Fields valid for a join RTE (else NULL/zero): */
JoinType jointype; /* type of join */
int joinmergedcols; /* number of merged (JOIN USING) columns */
List *joinaliasvars; /* list of alias-var expansions */
List *joinleftcols; /* left-side input column numbers */
List *joinrightcols; /* right-side input column numbers */
/* Fields valid for a function RTE (else NIL/zero): */
List *functions; /* list of RangeTblFunction nodes */
bool funcordinality; /* is this called WITH ORDINALITY? */
/* Fields valid for a TableFunc RTE (else NULL): */
TableFunc *tablefunc;
/* Fields valid for a values RTE (else NIL): */
List *values_lists; /* list of expression lists */
/* Fields valid for a CTE RTE (else NULL/zero): */
char *ctename; /* name of the WITH list item */
Index ctelevelsup; /* number of query levels up */
bool self_reference; /* is this a recursive self-reference? */
/* Fields valid for CTE, VALUES, ENR, and TableFunc RTEs (else NIL): */
List *coltypes; /* OID list of column type OIDs */
List *coltypmods; /* integer list of column typmods */
List *colcollations; /* OID list of column collation OIDs */
/* Fields valid for ENR RTEs (else NULL/zero): */
char *enrname; /* name of ephemeral named relation */
double enrtuples; /* estimated or actual from caller */
/* Fields valid in all RTEs: */
Alias *alias; /* user-written alias clause, if any */
Alias *eref; /* expanded reference names */
bool lateral; /* subquery, function, or values is LATERAL? */
bool inh; /* inheritance requested? */
bool inFromCl; /* present in FROM clause? */
AclMode requiredPerms; /* bitmask of required access permissions */
Oid checkAsUser; /* if valid, check access as this role */
Bitmapset *selectedCols; /* columns needing SELECT permission */
Bitmapset *insertedCols; /* columns needing INSERT permission */
Bitmapset *updatedCols; /* columns needing UPDATE permission */
Bitmapset *extraUpdatedCols; /* generated columns being updated */
List *securityQuals; /* security barrier quals to apply, if any */
} RangeTblEntry;
RangeTblRef
这个结构体定义在primnodes.h
typedef struct RangeTblRef
{
NodeTag type;
int rtindex;
} RangeTblRef;
JoinExpr
这个结构体定义在primnodes.h
typedef struct JoinExpr
{
NodeTag type;
JoinType jointype; /* type of join */
bool isNatural; /* Natural join? Will need to shape table */
Node *larg; /* left subtree */
Node *rarg; /* right subtree */
List *usingClause; /* USING clause, if any (list of String) */
Node *quals; /* qualifiers on join, if any */
Alias *alias; /* user-written alias clause, if any */
int rtindex; /* RT index assigned for join, or 0 */
} JoinExpr;
而其中的JoinType则是定义在nodes.h,如下所示
typedef enum JoinType
{
/*
* The canonical kinds of joins according to the SQL JOIN syntax. Only
* these codes can appear in parser output (e.g., JoinExpr nodes).
*/
JOIN_INNER, /* matching tuple pairs only */
JOIN_LEFT, /* pairs + unmatched LHS tuples */
JOIN_FULL, /* pairs + unmatched LHS + unmatched RHS */
JOIN_RIGHT, /* pairs + unmatched RHS tuples */
/*
* Semijoins and anti-semijoins (as defined in relational theory) do not
* appear in the SQL JOIN syntax, but there are standard idioms for
* representing them (e.g., using EXISTS). The planner recognizes these
* cases and converts them to joins. So the planner and executor must
* support these codes. NOTE: in JOIN_SEMI output, it is unspecified
* which matching RHS row is joined to. In JOIN_ANTI output, the row is
* guaranteed to be null-extended.
*/
JOIN_SEMI, /* 1 copy of each LHS row that has match(es) */
JOIN_ANTI, /* 1 copy of each LHS row that has no match */
/*
* These codes are used internally in the planner, but are not supported
* by the executor (nor, indeed, by most of the planner).
*/
JOIN_UNIQUE_OUTER, /* LHS path must be made unique */
JOIN_UNIQUE_INNER /* RHS path must be made unique */
} JoinType;
可以看得出来对于Join分为三类,一类是标准的Join,一类是类Join,还有一些内部使用的Join
FromExpr
这个结构体定义在primnodes.h
typedef struct FromExpr
{
NodeTag type;
List *fromlist; /* List of join subtrees */
Node *quals; /* qualifiers on join, if any */
} FromExpr;
对于FromExpr而言,*fromlist的各表是InnerJoin关系
Query
这个结构体定义在定义在parsenodes.h里,表示查询树,由语法分析模块产生,经过逻辑优化和物理优化
/*
* Query -
* Parse analysis turns all statements into a Query tree
* for further processing by the rewriter and planner.
*
* Utility statements (i.e. non-optimizable statements) have the
* utilityStmt field set, and the rest of the Query is mostly dummy.
*
* Planning converts a Query tree into a Plan tree headed by a PlannedStmt
* node --- the Query structure is not used by the executor.
*/
typedef struct Query
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete|utility */
QuerySource querySource; /* where did I come from? */
uint64 queryId; /* query identifier (can be set by plugins) */
bool canSetTag; /* do I set the command result tag? */
Node *utilityStmt; /* non-null if commandType == CMD_UTILITY */
int resultRelation; /* rtable index of target relation for
* INSERT/UPDATE/DELETE; 0 for SELECT */
bool hasAggs; /* has aggregates in tlist or havingQual */
bool hasWindowFuncs; /* has window functions in tlist */
bool hasTargetSRFs; /* has set-returning functions in tlist */
bool hasSubLinks; /* has subquery SubLink */
bool hasDistinctOn; /* distinctClause is from DISTINCT ON */
bool hasRecursive; /* WITH RECURSIVE was specified */
bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
bool hasForUpdate; /* FOR [KEY] UPDATE/SHARE was specified */
bool hasRowSecurity; /* rewriter has applied some RLS policy */
List *cteList; /* WITH list (of CommonTableExpr's) */
List *rtable; /* list of range table entries */
FromExpr *jointree; /* table join tree (FROM and WHERE clauses) */
List *targetList; /* target list (of TargetEntry) */
OverridingKind override; /* OVERRIDING clause */
OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] */
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
List *groupingSets; /* a list of GroupingSet's if present */
Node *havingQual; /* qualifications applied to groups */
List *windowClause; /* a list of WindowClause's */
List *distinctClause; /* a list of SortGroupClause's */
List *sortClause; /* a list of SortGroupClause's */
Node *limitOffset; /* # of result tuples to skip (int8 expr) */
Node *limitCount; /* # of result tuples to return (int8 expr) */
LimitOption limitOption; /* limit type */
List *rowMarks; /* a list of RowMarkClause's */
Node *setOperations; /* set-operation tree if this is top level of
* a UNION/INTERSECT/EXCEPT query */
List *constraintDeps; /* a list of pg_constraint OIDs that the query
* depends on to be semantically valid */
List *withCheckOptions; /* a list of WithCheckOption's (added
* during rewrite) */
/*
* The following two fields identify the portion of the source text string
* containing this query. They are typically only populated in top-level
* Queries, not in sub-queries. When not set, they might both be zero, or
* both be -1 meaning "unknown".
*/
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} Query;
其中里面反复出现的List这一数据结构定义在pg_list.h里
typedef union ListCell
{
void *ptr_value;
int int_value;
Oid oid_value;
} ListCell;
typedef struct List
{
NodeTag type; /* T_List, T_IntList, or T_OidList */
int length; /* number of elements currently present */
int max_length; /* allocated length of elements[] */
ListCell *elements; /* re-allocatable array of cells */
/* We may allocate some cells along with the List header: */
ListCell initial_elements[FLEXIBLE_ARRAY_MEMBER];
/* If elements == initial_elements, it's not a separate allocation */
} List;
其中Query中需要重点注意的是rtable,jointree,targetlist,分别表示查询的范围表,连接关系,需要投影的列
展示
在print.h给出了elog_node_display用于在源代码中调用来打印查询树,从而进行分析.
另外官方提供的runtime-config-logging也有很多参数来控制查询树的打印.
- debug_pretty_print 以结构化方式打印
- debug_print_parse 打印查询树
- debug_print_rewritten 重写后打印查询树
- debug_print_rewritten 打印查询计划
遍历
当对查询树进行修改时需要遍历,一般使用query_tree_mutator和query_tree_walker来操作.walker用于读而mutator用于操作
执行计划
可以使用explain来查询执行计划,其参数如下
- VERBOSE
更多的信息 - ANALYZE
查询语句会真正的执行 - BUFFERS
需要和ANALYZE同时使用,打印缓冲区命中率 - COSTS
是否打印代价,默认打开,可以使用costs off来关闭 - TIMING
是否打印实际运行时间,在使用ANALYZE时默认打开,可以使用timing off关闭