文章目录
Path和JoinPath和NestPath
/*
* Type "Path" is used as-is for sequential-scan paths, as well as some other
* simple plan types that we don't need any extra information in the path for.
* For other path types it is the first component of a larger struct.
*
* "pathtype" is the NodeTag of the Plan node we could build from this Path.
* It is partially redundant with the Path's NodeTag, but allows us to use
* the same Path type for multiple Plan types when there is no need to
* distinguish the Plan type during path processing.
*
* "parent" identifies the relation this Path scans, and "pathtarget"
* describes the precise set of output columns the Path would compute.
* In simple cases all Paths for a given rel share the same targetlist,
* which we represent by having path->pathtarget equal to parent->reltarget.
*
* "param_info", if not NULL, links to a ParamPathInfo that identifies outer
* relation(s) that provide parameter values to each scan of this path.
* That means this path can only be joined to those rels by means of nestloop
* joins with this path on the inside. Also note that a parameterized path
* is responsible for testing all "movable" joinclauses involving this rel
* and the specified outer rel(s).
*
* "rows" is the same as parent->rows in simple paths, but in parameterized
* paths and UniquePaths it can be less than parent->rows, reflecting the
* fact that we've filtered by extra join conditions or removed duplicates.
*
* "pathkeys" is a List of PathKey nodes (see above), describing the sort
* ordering of the path's output rows.
*/
typedef struct Path
{
NodeTag type;
NodeTag pathtype; /* tag identifying scan/join method */
RelOptInfo *parent; /* the relation this path can build */
PathTarget *pathtarget; /* list of Vars/Exprs, cost, width */
ParamPathInfo *param_info; /* parameterization info, or NULL if none */
bool parallel_aware; /* engage parallel-aware logic? */
bool parallel_safe; /* OK to use as part of parallel plan? */
int parallel_workers; /* desired # of workers; 0 = not parallel */
/* estimated size/costs for path (see costsize.c for more info) */
double rows; /* estimated number of result tuples */
Cost startup_cost; /* cost expended before fetching any tuples */
Cost total_cost; /* total cost (assuming all tuples fetched) */
List *pathkeys; /* sort ordering of path's output */
/* pathkeys is a List of PathKey nodes; see above */
} Path;
typedef struct JoinPath
{
Path path;
JoinType jointype;
bool inner_unique; /* each outer tuple provably matches no more
* than one inner tuple */
Path *outerjoinpath; /* path for the outer side of the join */
Path *innerjoinpath; /* path for the inner side of the join */
List *joinrestrictinfo; /* RestrictInfos to apply to join */
/*
* See the notes for RelOptInfo and ParamPathInfo to understand why
* joinrestrictinfo is needed in JoinPath, and can't be merged into the
* parent RelOptInfo.
*/
} JoinPath;
/*
* A nested-loop path needs no special fields.
*/
typedef JoinPath NestPath;
疑问:为什么joinpath一定要有joinrestrictinfo字段呢?不能融入到parent RelOptInfo吗?
try_nestloop_path
- 那个pathkeys肯定外表的pathkey
- 因为它假设外表有序
- 其实是在外路径的pathkeys上加工了下
- 注意是假设
- 截取一点点片段理解下啊
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerpath->pathkeys);
if (save_jointype == JOIN_UNIQUE_INNER)
{
/*
* Consider nestloop join, but only with the unique-ified cheapest
* inner path
*/
try_nestloop_path(root,
joinrel,
outerpath,
inner_cheapest_total,
merge_pathkeys,
jointype,
extra);
}
/* Macro for extracting a path's parameterization relids; beware double eval */
#define PATH_REQ_OUTER(path) \
((path)->param_info ? (path)->param_info->ppi_req_outer : (Relids) NULL)
/*
*
* Consider a nestloop join path; if it appears useful, push it into
* the joinrel's pathlist via add_path()
*/
static void
try_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *pathkeys,
JoinType jointype,
JoinPathExtraData *extra)
{
Relids required_outer;
JoinCostWorkspace workspace;
RelOptInfo *innerrel = inner_path->parent;
RelOptInfo *outerrel = outer_path->parent;
Relids innerrelids;
Relids outerrelids;
Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
/*
* Paths are parameterized by top-level parents, so run parameterization
* tests on the parent relids.
*/
if (innerrel->top_parent_relids)
innerrelids = innerrel->top_parent_relids;
else
innerrelids = innerrel->relids;
if (outerrel->top_parent_relids)
outerrelids = outerrel->top_parent_relids;
else
outerrelids = outerrel->relids;
/*
* Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible --- unless allow_star_schema_join
* says to allow it anyway. Also, we must reject if have_dangerous_phv
* doesn't like the look of it, which could only happen if the nestloop is
* still parameterized.
*/
required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
innerrelids, inner_paramrels);
if (required_outer &&
((!bms_overlap(required_outer, extra->param_source_rels) &&
!allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
have_dangerous_phv(root, outerrelids, inner_paramrels)))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
return;
}
/*
* Do a precheck to quickly eliminate obviously-inferior paths. We
* calculate a cheap lower bound on the path's cost and then use
* add_path_precheck() to see if the path is clearly going to be dominated
* by some existing path for the joinrel. If not, do the full pushup with
* creating a fully valid path structure and submitting it to add_path().
* The latter two steps are expensive enough to make this two-phase
* methodology worthwhile.
*/
initial_cost_nestloop(root, &workspace, jointype,
outer_path, inner_path, extra);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
pathkeys, required_outer))
{
/*
* If the inner path is parameterized, it is parameterized by the
* topmost parent of the outer rel, not the outer rel itself. Fix
* that.
*/
if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
{
inner_path = reparameterize_path_by_child(root, inner_path,
outer_path->parent);
/*
* If we could not translate the path, we can't create nest loop
* path.
*/
if (!inner_path)
{
bms_free(required_outer);
return;
}
}
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
&workspace,
extra,
outer_path,
inner_path,
extra->restrictlist,
pathkeys,
required_outer));
}
else
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
}
}
create_nestloop_path
/*
*
* 'workspace' is the result from initial_cost_nestloop
* 'extra' contains various information about the join
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
* 'required_outer' is the set of required outer rels
*
*/
NestPath *
create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
JoinPathExtraData *extra,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
Relids required_outer)
{
NestPath *pathnode = makeNode(NestPath);
Relids inner_req_outer = PATH_REQ_OUTER(inner_path);
/*
* If the inner path is parameterized by the outer, we must drop any
* restrict_clauses that are due to be moved into the inner path. We have
* to do this now, rather than postpone the work till createplan time,
* because the restrict_clauses list can affect the size and cost
* estimates for this path.
*/
if (bms_overlap(inner_req_outer, outer_path->parent->relids))
{
Relids inner_and_outer = bms_union(inner_path->parent->relids,
inner_req_outer);
List *jclauses = NIL;
ListCell *lc;
foreach(lc, restrict_clauses)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
if (!join_clause_is_movable_into(rinfo,
inner_path->parent->relids,
inner_and_outer))
jclauses = lappend(jclauses, rinfo);
}
restrict_clauses = jclauses;
}
pathnode->path.pathtype = T_NestLoop;
pathnode->path.parent = joinrel;
pathnode->path.pathtarget = joinrel->reltarget;
pathnode->path.param_info =
get_joinrel_parampathinfo(root,
joinrel,
outer_path,
inner_path,
extra->sjinfo,
required_outer,
&restrict_clauses);
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = joinrel->consider_parallel &&
outer_path->parallel_safe && inner_path->parallel_safe;
/* This is a foolish way to estimate parallel_workers, but for now... */
pathnode->path.parallel_workers = outer_path->parallel_workers;
pathnode->path.pathkeys = pathkeys;
pathnode->jointype = jointype;
pathnode->inner_unique = extra->inner_unique;
pathnode->outerjoinpath = outer_path;
pathnode->innerjoinpath = inner_path;
pathnode->joinrestrictinfo = restrict_clauses;
final_cost_nestloop(root, pathnode, workspace, extra);
return pathnode;
}
create_nestloop_plan
select * from test1 , test2 where test1.id2=test2.id2 ;
static NestLoop *
create_nestloop_plan(PlannerInfo *root,
NestPath *best_path)
{
NestLoop *join_plan;
Plan *outer_plan;
Plan *inner_plan;
List *tlist = build_path_tlist(root, &best_path->path);
//这个执行完后,tlist长度是4
List *joinrestrictclauses = best_path->joinrestrictinfo;
//执行完这个后,joinrestrictclauses是长度为1
List *joinclauses;
List *otherclauses;
Relids outerrelids;
List *nestParams;
Relids saveOuterRels = root->curOuterRels;
//为空
/* NestLoop can project, so no need to be picky about child tlists */
outer_plan = create_plan_recurse(root, best_path->outerjoinpath, 0);
/*
(gdb) print *outer_plan
$7 = {type = T_SeqScan, startup_cost = 0, total_cost = 1443,
plan_rows = 100000, plan_width = 8, parallel_aware = false,
parallel_safe = true, plan_node_id = 0, targetlist = 0x1b2ee70, qual = 0x0,
lefttree = 0x0, righttree = 0x0, initPlan = 0x0, extParam = 0x0,
allParam = 0x0}
*/
/* For a nestloop, include outer relids in curOuterRels for inner side */
root->curOuterRels = bms_union(root->curOuterRels,
best_path->outerjoinpath->parent->relids);
//然后这个curOuterRels的nwords=1,words[0]=2
//明显第一个表做了外表
inner_plan = create_plan_recurse(root, best_path->innerjoinpath, 0);
/*
$17 = {type = T_Material, startup_cost = 0, total_cost = 1943,
plan_rows = 100000, plan_width = 8, parallel_aware = false,
parallel_safe = true, plan_node_id = 0, targetlist = 0x1b2efd0, qual = 0x0,
lefttree = 0x1b2f070, righttree = 0x0, initPlan = 0x0, extParam = 0x0,
allParam = 0x0}
(gdb)
*/
/* Restore curOuterRels */
bms_free(root->curOuterRels);
root->curOuterRels = saveOuterRels;
/* Sort join qual clauses into best execution order */
joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses);
//长度为1
/* Get the join qual clauses (in plain expression form) */
/* Any pseudoconstant clauses are ignored here */
if (IS_OUTER_JOIN(best_path->jointype))
{
extract_actual_join_clauses(joinrestrictclauses,
best_path->path.parent->relids,
&joinclauses, &otherclauses);
}
else
{
/* We can treat all clauses alike for an inner join */
joinclauses = extract_actual_clauses(joinrestrictclauses, false);
otherclauses = NIL;
}
/* Replace any outer-relation variables with nestloop params */
if (best_path->path.param_info)
{
joinclauses = (List *)
replace_nestloop_params(root, (Node *) joinclauses);
otherclauses = (List *)
replace_nestloop_params(root, (Node *) otherclauses);
}
/*
* Identify any nestloop parameters that should be supplied by this join
* node, and remove them from root->curOuterParams.
*/
outerrelids = best_path->outerjoinpath->parent->relids;
//outerrelis->words[0]=1;
nestParams = identify_current_nestloop_params(root, outerrelids);
//为空
join_plan = make_nestloop(tlist,
joinclauses,
otherclauses,
nestParams,
outer_plan,
inner_plan,
best_path->jointype,
best_path->inner_unique);
copy_generic_path_info(&join_plan->join.plan, &best_path->path);
return join_plan;
}
- 这个计划创建完后,打印这个plan
$24 = {join = {plan = {type = T_NestLoop, startup_cost = 0,
total_cost = 150003136, plan_rows = 100008300, plan_width = 16,
parallel_aware = false, parallel_safe = true, plan_node_id = 0,
targetlist = 0x1b2ec30, qual = 0x0, lefttree = 0x1b2e7e0,
righttree = 0x1b2f100, initPlan = 0x0, extParam = 0x0, allParam = 0x0},
jointype = JOIN_INNER, inner_unique = false, joinqual = 0x1b2f1b0},
nestParams = 0x0}
make_nestloop
static NestLoop *
make_nestloop(List *tlist,
List *joinclauses,
List *otherclauses,
List *nestParams,
Plan *lefttree,
Plan *righttree,
JoinType jointype,
bool inner_unique)
{
NestLoop *node = makeNode(NestLoop);
Plan *plan = &node->join.plan;
plan->targetlist = tlist;
plan->qual = otherclauses;
plan->lefttree = lefttree;
plan->righttree = righttree;
node->join.jointype = jointype;
node->join.inner_unique = inner_unique;
node->join.joinqual = joinclauses;
node->nestParams = nestParams;
return node;
}
build_path_list是啥意思
-
可以看出来这个东西主要跟path->pathtarget有关
-
而记得我们在创建nestpath的时候这个值是根据这个赋值语句来的
-
在create_nestloop_path里面的
path->path.pathtarget=joinrel->reltarget;
/*
* Build a target list (ie, a list of TargetEntry) for the Path's output
*
* almost just make_tlist_from_pathtarget(), but we also have to
* deal with replacing nestloop params.
*/
static List *
build_path_tlist(PlannerInfo *root, Path *path)
{
List *tlist = NIL;
Index *sortgrouprefs = path->pathtarget->sortgrouprefs;
int resno = 1;
ListCell *v;
foreach(v, path->pathtarget->exprs)
{
Node *node = (Node *) lfirst(v);
TargetEntry *tle;
/*
* If it's a parameterized path, there might be lateral references in
* the tlist, which need to be replaced with Params. There's no need
* to remake the TargetEntry nodes, so apply this to each list item
* separately.
*/
if (path->param_info)
node = replace_nestloop_params(root, node);
tle = makeTargetEntry((Expr *) node,
resno,
NULL,
false);
if (sortgrouprefs)
tle->ressortgroupref = sortgrouprefs[resno - 1];
tlist = lappend(tlist, tle);
resno++;
}
return tlist;
}
- 输出的是几个投影吧
- 我的SQL语句是四个投影
- 记住joinrestrictclauses就是我们创建路径时候输入的
- extra->restrictlist
extract_actual_clauses
/*
* Extract bare clauses from 'restrictinfo_list', returning either the
* regular ones or the pseudoconstant ones per 'pseudoconstant'.
*/
List *
extract_actual_clauses(List *restrictinfo_list,
bool pseudoconstant)
{
List *result = NIL;
ListCell *l;
foreach(l, restrictinfo_list)
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
if (rinfo->pseudoconstant == pseudoconstant)
result = lappend(result, rinfo->clause);
}
return result;
}
ExecInitNestLoop
NestLoopState *
ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
{
NestLoopState *nlstate;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
NL1_printf("ExecInitNestLoop: %s\n",
"initializing node");
/*
* create state structure
*/
nlstate = makeNode(NestLoopState);
nlstate->js.ps.plan = (Plan *) node;
nlstate->js.ps.state = estate;
nlstate->js.ps.ExecProcNode = ExecNestLoop;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &nlstate->js.ps);
/*
* initialize child nodes
*
* If we have no parameters to pass into the inner rel from the outer,
* tell the inner child that cheap rescans would be good. If we do have
* such parameters, then there is no point in REWIND support at all in the
* inner child, because it will always be rescanned with fresh parameter
* values.
*/
outerPlanState(nlstate) = ExecInitNode(outerPlan(node), estate, eflags);
if (node->nestParams == NIL)
eflags |= EXEC_FLAG_REWIND;
else
eflags &= ~EXEC_FLAG_REWIND;
innerPlanState(nlstate) = ExecInitNode(innerPlan(node), estate, eflags);
/*
* Initialize result slot, type and projection.
*/
ExecInitResultTupleSlotTL(estate, &nlstate->js.ps);
ExecAssignProjectionInfo(&nlstate->js.ps, NULL);
/*
* initialize child expressions
*/
nlstate->js.ps.qual =
ExecInitQual(node->join.plan.qual, (PlanState *) nlstate);
nlstate->js.jointype = node->join.jointype;
nlstate->js.joinqual =
ExecInitQual(node->join.joinqual, (PlanState *) nlstate);
/*
* detect whether we need only consider the first matching inner tuple
*/
nlstate->js.single_match = (node->join.inner_unique ||
node->join.jointype == JOIN_SEMI);
/* set up null tuples for outer joins, if needed */
switch (node->join.jointype)
{
case JOIN_INNER:
case JOIN_SEMI:
break;
case JOIN_LEFT:
case JOIN_ANTI:
nlstate->nl_NullInnerTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(nlstate)));
break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) node->join.jointype);
}
/*
* finally, wipe the current outer tuple clean.
*/
nlstate->nl_NeedNewOuter = true;
nlstate->nl_MatchedOuter = false;
NL1_printf("ExecInitNestLoop: %s\n",
"node initialized");
return nlstate;
}
ExecNestLoop
/* ----------------------------------------------------------------
* ExecNestLoop(node)
*
* old comments
* Returns the tuple joined from inner and outer tuples which
* satisfies the qualification clause.
*
* It scans the inner relation to join with current outer tuple.
*
* If none is found, next tuple from the outer relation is retrieved
* and the inner relation is scanned from the beginning again to join
* with the outer tuple.
*
* NULL is returned if all the remaining outer tuples are tried and
* all fail to join with the inner tuples.
*
* NULL is also returned if there is no tuple from inner relation.
*
* Conditions:
* -- outerTuple contains current tuple from outer relation and
* the right son(inner relation) maintains "cursor" at the tuple
* returned previously.
* This is achieved by maintaining a scan position on the outer
* relation.
*
* Initial States:
* -- the outer child and the inner child
* are prepared to return the first tuple.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecNestLoop(PlanState *pstate)
{
NestLoopState *node = castNode(NestLoopState, pstate);
NestLoop *nl;
PlanState *innerPlan;
PlanState *outerPlan;
TupleTableSlot *outerTupleSlot;
TupleTableSlot *innerTupleSlot;
ExprState *joinqual;
ExprState *otherqual;
ExprContext *econtext;
ListCell *lc;
CHECK_FOR_INTERRUPTS();
/*
* get information from the node
*/
ENL1_printf("getting info from node");
nl = (NestLoop *) node->js.ps.plan;
joinqual = node->js.joinqual;
otherqual = node->js.ps.qual;
outerPlan = outerPlanState(node);
innerPlan = innerPlanState(node);
econtext = node->js.ps.ps_ExprContext;
/*
* Reset per-tuple memory context to free any expression evaluation
* storage allocated in the previous tuple cycle.
*/
ResetExprContext(econtext);
/*
* Ok, everything is setup for the join so now loop until we return a
* qualifying join tuple.
*/
ENL1_printf("entering main loop");
for (;;)
{
/*
* If we don't have an outer tuple, get the next one and reset the
* inner scan.
*/
if (node->nl_NeedNewOuter)
{
ENL1_printf("getting new outer tuple");
outerTupleSlot = ExecProcNode(outerPlan);
/*
* if there are no more outer tuples, then the join is complete..
*/
if (TupIsNull(outerTupleSlot))
{
ENL1_printf("no outer tuple, ending join");
return NULL;
}
ENL1_printf("saving new outer tuple information");
econtext->ecxt_outertuple = outerTupleSlot;
node->nl_NeedNewOuter = false;
node->nl_MatchedOuter = false;
/*
* fetch the values of any outer Vars that must be passed to the
* inner scan, and store them in the appropriate PARAM_EXEC slots.
*/
foreach(lc, nl->nestParams)
{
NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
int paramno = nlp->paramno;
ParamExecData *prm;
prm = &(econtext->ecxt_param_exec_vals[paramno]);
/* Param value should be an OUTER_VAR var */
Assert(IsA(nlp->paramval, Var));
Assert(nlp->paramval->varno == OUTER_VAR);
Assert(nlp->paramval->varattno > 0);
prm->value = slot_getattr(outerTupleSlot,
nlp->paramval->varattno,
&(prm->isnull));
/* Flag parameter value as changed */
innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
paramno);
}
/*
* now rescan the inner plan
*/
ENL1_printf("rescanning inner plan");
ExecReScan(innerPlan);
}
/*
* we have an outerTuple, try to get the next inner tuple.
*/
ENL1_printf("getting new inner tuple");
innerTupleSlot = ExecProcNode(innerPlan);
econtext->ecxt_innertuple = innerTupleSlot;
if (TupIsNull(innerTupleSlot))
{
ENL1_printf("no inner tuple, need new outer tuple");
node->nl_NeedNewOuter = true;
if (!node->nl_MatchedOuter &&
(node->js.jointype == JOIN_LEFT ||
node->js.jointype == JOIN_ANTI))
{
/*
* We are doing an outer join and there were no join matches
* for this outer tuple. Generate a fake join tuple with
* nulls for the inner tuple, and return it if it passes the
* non-join quals.
*/
econtext->ecxt_innertuple = node->nl_NullInnerTupleSlot;
ENL1_printf("testing qualification for outer-join tuple");
if (otherqual == NULL || ExecQual(otherqual, econtext))
{
/*
* qualification was satisfied so we project and return
* the slot containing the result tuple using
* ExecProject().
*/
ENL1_printf("qualification succeeded, projecting tuple");
return ExecProject(node->js.ps.ps_ProjInfo);
}
else
InstrCountFiltered2(node, 1);
}
/*
* Otherwise just return to top of loop for a new outer tuple.
*/
continue;
}
/*
* at this point we have a new pair of inner and outer tuples so we
* test the inner and outer tuples to see if they satisfy the node's
* qualification.
*
* Only the joinquals determine MatchedOuter status, but all quals
* must pass to actually return the tuple.
*/
ENL1_printf("testing qualification");
if (ExecQual(joinqual, econtext))
{
node->nl_MatchedOuter = true;
/* In an antijoin, we never return a matched tuple */
if (node->js.jointype == JOIN_ANTI)
{
node->nl_NeedNewOuter = true;
continue; /* return to top of loop */
}
/*
* If we only need to join to the first matching inner tuple, then
* consider returning this one, but after that continue with next
* outer tuple.
*/
if (node->js.single_match)
node->nl_NeedNewOuter = true;
if (otherqual == NULL || ExecQual(otherqual, econtext))
{
/*
* qualification was satisfied so we project and return the
* slot containing the result tuple using ExecProject().
*/
ENL1_printf("qualification succeeded, projecting tuple");
return ExecProject(node->js.ps.ps_ProjInfo);
}
else
InstrCountFiltered2(node, 1);
}
else
InstrCountFiltered1(node, 1);
/*
* Tuple fails qual, so free per-tuple memory and try again.
*/
ResetExprContext(econtext);
ENL1_printf("qualification failed, looping");
}
}