PostgreSQL 源码解读（5）- 插入数据#4（ExecInsert）

最新推荐文章于 2023-06-28 21:07:16 发布

cuichao1900

最新推荐文章于 2023-06-28 21:07:16 发布

阅读量1.1k

点赞数

本文简单介绍了PG插入数据部分的源码，主要内容包括ExecInsert函数的实现逻辑，该函数位于nodeModifyTable.c文件中。

一、基础信息

按惯例，首先看看ExecInsert函数使用的数据结构、宏定义以及依赖的函数等。
数据结构/宏定义
1、ModifyTableState

 /* ----------------
  *      PlanState node
  *
  * We never actually instantiate any PlanState nodes; this is just the common
  * abstract superclass for all PlanState-type nodes.
  * ----------------
  */
 typedef struct PlanState
 {
     NodeTag     type;
 
     Plan       *plan;           /* associated Plan node */
 
     EState     *state;          /* at execution time, states of individual
                                  * nodes point to one EState for the whole
                                  * top-level plan */
 
     ExecProcNodeMtd ExecProcNode;   /* function to return next tuple */
     ExecProcNodeMtd ExecProcNodeReal;   /* actual function, if above is a
                                          * wrapper */
 
     Instrumentation *instrument;    /* Optional runtime stats for this node */
     WorkerInstrumentation *worker_instrument;   /* per-worker instrumentation */
 
     /*
      * Common structural data for all Plan types.  These links to subsidiary
      * state trees parallel links in the associated plan tree (except for the
      * subPlan list, which does not exist in the plan tree).
      */
     ExprState  *qual;           /* boolean qual condition */
     struct PlanState *lefttree; /* input plan tree(s) */
     struct PlanState *righttree;
 
     List       *initPlan;       /* Init SubPlanState nodes (un-correlated expr
                                  * subselects) */
     List       *subPlan;        /* SubPlanState nodes in my expressions */
 
     /*
      * State for management of parameter-change-driven rescanning
      */
     Bitmapset  *chgParam;       /* set of IDs of changed Params */
 
     /*
      * Other run-time state needed by most if not all node types.
      */
     TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */
     ExprContext *ps_ExprContext;    /* node's expression-evaluation context */
     ProjectionInfo *ps_ProjInfo;    /* info for doing tuple projection */
 
     /*
      * Scanslot's descriptor if known. This is a bit of a hack, but otherwise
      * it's hard for expression compilation to optimize based on the
      * descriptor, without encoding knowledge about all executor nodes.
      */
     TupleDesc   scandesc;
 } PlanState;

/* ----------------
  *   ModifyTableState information
  * ----------------
  */
 typedef struct ModifyTableState
 {
     PlanState   ps;             /* its first field is NodeTag */
     CmdType     operation;      /* INSERT, UPDATE, or DELETE */
     bool        canSetTag;      /* do we set the command tag/es_processed? */
     bool        mt_done;        /* are we done? */
     PlanState **mt_plans;       /* subplans (one per target rel) */
     int         mt_nplans;      /* number of plans in the array */
     int         mt_whichplan;   /* which one is being executed (0..n-1) */
     ResultRelInfo *resultRelInfo;   /* per-subplan target relations */
     ResultRelInfo *rootResultRelInfo;   /* root target relation (partitioned
                                          * table root) */
     List      **mt_arowmarks;   /* per-subplan ExecAuxRowMark lists */
     EPQState    mt_epqstate;    /* for evaluating EvalPlanQual rechecks */
     bool        fireBSTriggers; /* do we need to fire stmt triggers? */
     TupleTableSlot *mt_existing;    /* slot to store existing target tuple in */
     List       *mt_excludedtlist;   /* the excluded pseudo relation's tlist  */
     TupleTableSlot *mt_conflproj;   /* CONFLICT ... SET ... projection target */
 
     /* Tuple-routing support info */
     struct PartitionTupleRouting *mt_partition_tuple_routing;
 
     /* controls transition table population for specified operation */
     struct TransitionCaptureState *mt_transition_capture;
 
     /* controls transition table population for INSERT...ON CONFLICT UPDATE */
     struct TransitionCaptureState *mt_oc_transition_capture;
 
     /* Per plan map for tuple conversion from child to root */
     TupleConversionMap **mt_per_subplan_tupconv_maps;
 } ModifyTableState;

2、TupleTableSlot

/*----------
  * The executor stores tuples in a "tuple table" which is a List of
  * independent TupleTableSlots.  There are several cases we need to handle:
  *      1. physical tuple in a disk buffer page
  *      2. physical tuple constructed in palloc'ed memory
  *      3. "minimal" physical tuple constructed in palloc'ed memory
  *      4. "virtual" tuple consisting of Datum/isnull arrays
  *
  * The first two cases are similar in that they both deal with "materialized"
  * tuples, but resource management is different.  For a tuple in a disk page
  * we need to hold a pin on the buffer until the TupleTableSlot's reference
  * to the tuple is dropped; while for a palloc'd tuple we usually want the
  * tuple pfree'd when the TupleTableSlot's reference is dropped.
  *
  * A "minimal" tuple is handled similarly to a palloc'd regular tuple.
  * At present, minimal tuples never are stored in buffers, so there is no
  * parallel to case 1.  Note that a minimal tuple has no "system columns".
  * (Actually, it could have an OID, but we have no need to access the OID.)
  *
  * A "virtual" tuple is an optimization used to minimize physical data
  * copying in a nest of plan nodes.  Any pass-by-reference Datums in the
  * tuple point to storage that is not directly associated with the
  * TupleTableSlot; generally they will point to part of a tuple stored in
  * a lower plan node's output TupleTableSlot, or to a function result
  * constructed in a plan node's per-tuple econtext.  It is the responsibility
  * of the generating plan node to be sure these resources are not released
  * for as long as the virtual tuple needs to be valid.  We only use virtual
  * tuples in the result slots of plan nodes --- tuples to be copied anywhere
  * else need to be "materialized" into physical tuples.  Note also that a
  * virtual tuple does not have any "system columns".
  *
  * It is also possible for a TupleTableSlot to hold both physical and minimal
  * copies of a tuple.  This is done when the slot is requested to provide
  * the format other than the one it currently holds.  (Originally we attempted
  * to handle such requests by replacing one format with the other, but that
  * had the fatal defect of invalidating any pass-by-reference Datums pointing
  * into the existing slot contents.)  Both copies must contain identical data
  * payloads when this is the case.
  *
  * The Datum/isnull arrays of a TupleTableSlot serve double duty.  When the
  * slot contains a virtual tuple, they are the authoritative data.  When the
  * slot contains a physical tuple, the arrays contain data extracted from
  * the tuple.  (In this state, any pass-by-reference Datums point into
  * the physical tuple.)  The extracted information is built "lazily",
  * ie, only as needed.  This serves to avoid repeated extraction of data
  * from the physical tuple.
  *
  * A TupleTableSlot can also be "empty", holding no valid data.  This is
  * the only valid state for a freshly-created slot that has not yet had a
  * tuple descriptor assigned to it.  In this state, tts_isempty must be
  * true, tts_shouldFree false, tts_tuple NULL, tts_buffer InvalidBuffer,
  * and tts_nvalid zero.
  *
  * The tupleDescriptor is simply referenced, not copied, by the TupleTableSlot
  * code.  The caller of ExecSetSlotDescriptor() is responsible for providing
  * a descriptor that will live as long as the slot does.  (Typically, both
  * slots and descriptors are in per-query memory and are freed by memory
  * context deallocation at query end; so it's not worth providing any extra
  * mechanism to do more.  However, the slot will increment the tupdesc
  * reference count if a reference-counted tupdesc is supplied.)
  *
  * When tts_shouldFree is true, the physical tuple is "owned" by the slot
  * and should be freed when the slot's reference to the tuple is dropped.
  *
  * If tts_buffer is not InvalidBuffer, then the slot is holding a pin
  * on the indicated buffer page; drop the pin when we release the
  * slot's reference to that buffer.  (tts_shouldFree should always be
  * false in such a case, since presumably tts_tuple is pointing at the
  * buffer page.)
  *
  * tts_nvalid indicates the number of valid columns in the tts_values/isnull
  * arrays.  When the slot is holding a "virtual" tuple this must be equal
  * to the descriptor's natts.  When the slot is holding a physical tuple
  * this is equal to the number of columns we have extracted (we always
  * extract columns from left to right, so there are no holes).
  *
  * tts_values/tts_isnull are allocated when a descriptor is assigned to the
  * slot; they are of length equal to the descriptor's natts.
  *
  * tts_mintuple must always be NULL if the slot does not hold a "minimal"
  * tuple.  When it does, tts_mintuple points to the actual MinimalTupleData
  * object (the thing to be pfree'd if tts_shouldFreeMin is true).  If the slot
  * has only a minimal and not also a regular physical tuple, then tts_tuple
  * points at tts_minhdr and the fields of that struct are set correctly
  * for access to the minimal tuple; in particular, tts_minhdr.t_data points
  * MINIMAL_TUPLE_OFFSET bytes before tts_mintuple.  This allows column
  * extraction to treat the case identically to regular physical tuples.
  *
  * tts_slow/tts_off are saved state for slot_deform_tuple, and should not
  * be touched by any other code.
  *----------
  */
 typedef struct TupleTableSlot
 {
     NodeTag     type;
     bool        tts_isempty;    /* true = slot is empty */
     bool        tts_shouldFree; /* should pfree tts_tuple? */
     bool        tts_shouldFreeMin;  /* should pfree tts_mintuple? */
 #define FIELDNO_TUPLETABLESLOT_SLOW 4
     bool        tts_slow;       /* saved state for slot_deform_tuple */
 #define FIELDNO_TUPLETABLESLOT_TUPLE 5
     HeapTuple   tts_tuple;      /* physical tuple, or NULL if virtual */
 #define FIELDNO_TUPLETABLESLOT_TUPLEDESCRIPTOR 6
     TupleDesc   tts_tupleDescriptor;    /* slot's tuple descriptor */
     MemoryContext tts_mcxt;     /* slot itself is in this context */
     Buffer      tts_buffer;     /* tuple's buffer, or InvalidBuffer */
 #define FIELDNO_TUPLETABLESLOT_NVALID 9
     int         tts_nvalid;     /* # of valid values in tts_values */
 #define FIELDNO_TUPLETABLESLOT_VALUES 10
     Datum      *tts_values;     /* current per-attribute values */
 #define FIELDNO_TUPLETABLESLOT_ISNULL 11
     bool       *tts_isnull;     /* current per-attribute isnull flags */
     MinimalTuple tts_mintuple;  /* minimal tuple, or NULL if none */
     HeapTupleData tts_minhdr;   /* workspace for minimal-tuple-only case */
 #define FIELDNO_TUPLETABLESLOT_OFF 14
     uint32      tts_off;        /* saved state for slot_deform_tuple */
     bool        tts_fixedTupleDescriptor;   /* descriptor can't be changed */
 } TupleTableSlot;

3、EState

 /* ----------------
  *    EState information
  *
  * Master working state for an Executor invocation
  * ----------------
  */
 typedef struct EState
 {
     NodeTag     type;
 
     /* Basic state for all query types: */
     ScanDirection es_direction; /* current scan direction */
     Snapshot    es_snapshot;    /* time qual to use */
     Snapshot    es_crosscheck_snapshot; /* crosscheck time qual for RI */
     List       *es_range_table; /* List of RangeTblEntry */
     PlannedStmt *es_plannedstmt;    /* link to top of plan tree */
     const char *es_sourceText;  /* Source text from QueryDesc */
 
     JunkFilter *es_junkFilter;  /* top-level junk filter, if any */
 
     /* If query can insert/delete tuples, the command ID to mark them with */
     CommandId   es_output_cid;
 
     /* Info about target table(s) for insert/update/delete queries: */
     ResultRelInfo *es_result_relations; /* array of ResultRelInfos */
     int         es_num_result_relations;    /* length of array */
     ResultRelInfo *es_result_relation_info; /* currently active array elt */
 
     /*
      * Info about the target partitioned target table root(s) for
      * update/delete queries.  They required only to fire any per-statement
      * triggers defined on the table.  It exists separately from
      * es_result_relations, because partitioned tables don't appear in the
      * plan tree for the update/delete cases.
      */
     ResultRelInfo *es_root_result_relations;    /* array of ResultRelInfos */
     int         es_num_root_result_relations;   /* length of the array */
 
     /*
      * The following list contains ResultRelInfos created by the tuple routing
      * code for partitions that don't already have one.
      */
     List       *es_tuple_routing_result_relations;
 
     /* Stuff used for firing triggers: */
     List       *es_trig_target_relations;   /* trigger-only ResultRelInfos */
     TupleTableSlot *es_trig_tuple_slot; /* for trigger output tuples */
     TupleTableSlot *es_trig_oldtup_slot;    /* for TriggerEnabled */
     TupleTableSlot *es_trig_newtup_slot;    /* for TriggerEnabled */
 
     /* Parameter info: */
     ParamListInfo es_param_list_info;   /* values of external params */
     ParamExecData *es_param_exec_vals;  /* values of internal params */
 
     QueryEnvironment *es_queryEnv;  /* query environment */
 
     /* Other working state: */
     MemoryContext es_query_cxt; /* per-query context in which EState lives */
 
     List       *es_tupleTable;  /* List of TupleTableSlots */
 
     List       *es_rowMarks;    /* List of ExecRowMarks */
 
     uint64      es_processed;   /* # of tuples processed */
     Oid         es_lastoid;     /* last oid processed (by INSERT) */
 
     int         es_top_eflags;  /* eflags passed to ExecutorStart */
     int         es_instrument;  /* OR of InstrumentOption flags */
     bool        es_finished;    /* true when ExecutorFinish is done */
 
     List       *es_exprcontexts;    /* List of ExprContexts within EState */
 
     List       *es_subplanstates;   /* List of PlanState for SubPlans */
 
     List       *es_auxmodifytables; /* List of secondary ModifyTableStates */
 
     /*
      * this ExprContext is for per-output-tuple operations, such as constraint
      * checks and index-value computations.  It will be reset for each output
      * tuple.  Note that it will be created only if needed.
      */
     ExprContext *es_per_tuple_exprcontext;
 
     /*
      * These fields are for re-evaluating plan quals when an updated tuple is
      * substituted in READ COMMITTED mode.  es_epqTuple[] contains tuples that
      * scan plan nodes should return instead of whatever they'd normally
      * return, or NULL if nothing to return; es_epqTupleSet[] is true if a
      * particular array entry is valid; and es_epqScanDone[] is state to
      * remember if the tuple has been returned already.  Arrays are of size
      * list_length(es_range_table) and are indexed by scan node scanrelid - 1.
      */
     HeapTuple  *es_epqTuple;    /* array of EPQ substitute tuples */
     bool       *es_epqTupleSet; /* true if EPQ tuple is provided */
     bool       *es_epqScanDone; /* true if EPQ tuple has been fetched */
 
     bool        es_use_parallel_mode;   /* can we use parallel workers? */
 
     /* The per-query shared memory area to use for parallel execution. */
     struct dsa_area *es_query_dsa;
 
     /*
      * JIT information. es_jit_flags indicates whether JIT should be performed
      * and with which options.  es_jit is created on-demand when JITing is
      * performed.
      */
     int         es_jit_flags;
     struct JitContext *es_jit;
 } EState;

4、ResultRelInfo

 /*
  * ResultRelInfo
  *
  * Whenever we update an existing relation, we have to update indexes on the
  * relation, and perhaps also fire triggers.  ResultRelInfo holds all the
  * information needed about a result relation, including indexes.
  */
 typedef struct ResultRelInfo
 {
     NodeTag     type;
 
     /* result relation's range table index */
     Index       ri_RangeTableIndex;
 
     /* relation descriptor for result relation */
     Relation    ri_RelationDesc;
 
     /* # of indices existing on result relation */
     int         ri_NumIndices;
 
     /* array of relation descriptors for indices */
     RelationPtr ri_IndexRelationDescs;
 
     /* array of key/attr info for indices */
     IndexInfo **ri_IndexRelationInfo;
 
     /* triggers to be fired, if any */
     TriggerDesc *ri_TrigDesc;
 
     /* cached lookup info for trigger functions */
     FmgrInfo   *ri_TrigFunctions;
 
     /* array of trigger WHEN expr states */
     ExprState **ri_TrigWhenExprs;
 
     /* optional runtime measurements for triggers */
     Instrumentation *ri_TrigInstrument;
 
     /* FDW callback functions, if foreign table */
     struct FdwRoutine *ri_FdwRoutine;
 
     /* available to save private state of FDW */
     void       *ri_FdwState;
 
     /* true when modifying foreign table directly */
     bool        ri_usesFdwDirectModify;
 
     /* list of WithCheckOption's to be checked */
     List       *ri_WithCheckOptions;
 
     /* list of WithCheckOption expr states */
     List       *ri_WithCheckOptionExprs;
 
     /* array of constraint-checking expr states */
     ExprState **ri_ConstraintExprs;
 
     /* for removing junk attributes from tuples */
     JunkFilter *ri_junkFilter;
 
     /* list of RETURNING expressions */
     List       *ri_returningList;
 
     /* for computing a RETURNING list */
     ProjectionInfo *ri_projectReturning;
 
     /* list of arbiter indexes to use to check conflicts */
     List       *ri_onConflictArbiterIndexes;
 
     /* ON CONFLICT evaluation state */
     OnConflictSetState *ri_onConflict;
 
     /* partition check expression */
     List       *ri_PartitionCheck;
 
     /* partition check expression state */
     ExprState  *ri_PartitionCheckExpr;
 
     /* relation descriptor for root partitioned table */
     Relation    ri_PartitionRoot;
 
     /* true if ready for tuple routing */
     bool        ri_PartitionReadyForRouting;
 } ResultRelInfo;

5、List

 typedef struct ListCell ListCell;
 
 typedef struct List
 {
     NodeTag     type;           /* T_List, T_IntList, or T_OidList */
     int         length;
     ListCell   *head;
     ListCell   *tail;
 } List;
 
 struct ListCell
 {
     union
     {
         void       *ptr_value;
         int         int_value;
         Oid         oid_value;
     }           data;
     ListCell   *next;
 };

6、TransitionCaptureState

 
 typedef struct TransitionCaptureState
 {
     /*
      * Is there at least one trigger specifying each transition relation on
      * the relation explicitly named in the DML statement or COPY command?
      * Note: in current usage, these flags could be part of the private state,
      * but it seems possibly useful to let callers see them.
      */
     bool        tcs_delete_old_table;
     bool        tcs_update_old_table;
     bool        tcs_update_new_table;
     bool        tcs_insert_new_table;
 
     /*
      * For UPDATE and DELETE, AfterTriggerSaveEvent may need to convert the
      * new and old tuples from a child table's format to the format of the
      * relation named in a query so that it is compatible with the transition
      * tuplestores.  The caller must store the conversion map here if so.
      */
     TupleConversionMap *tcs_map;
 
     /*
      * For INSERT and COPY, it would be wasteful to convert tuples from child
      * format to parent format after they have already been converted in the
      * opposite direction during routing.  In that case we bypass conversion
      * and allow the inserting code (copy.c and nodeModifyTable.c) to provide
      * the original tuple directly.
      */
     HeapTuple   tcs_original_insert_tuple;
 
     /*
      * Private data including the tuplestore(s) into which to insert tuples.
      */
     struct AfterTriggersTableData *tcs_private;
 } TransitionCaptureState;

*7、ModifyTable *

 /* ----------------
  *   ModifyTable node -
  *      Apply rows produced by subplan(s) to result table(s),
  *      by inserting, updating, or deleting.
  *
  * Note that rowMarks and epqParam are presumed to be valid for all the
  * subplan(s); they can't contain any info that varies across subplans.
  * ----------------
  */
 typedef struct ModifyTable
 {
     Plan        plan;
     CmdType     operation;      /* INSERT, UPDATE, or DELETE */
     bool        canSetTag;      /* do we set the command tag/es_processed? */
     Index       nominalRelation;    /* Parent RT index for use of EXPLAIN */
     /* RT indexes of non-leaf tables in a partition tree */
     List       *partitioned_rels;
     bool        partColsUpdated;    /* some part key in hierarchy updated */
     List       *resultRelations;    /* integer list of RT indexes */
     int         resultRelIndex; /* index of first resultRel in plan's list */
     int         rootResultRelIndex; /* index of the partitioned table root */
     List       *plans;          /* plan(s) producing source data */
     List       *withCheckOptionLists;   /* per-target-table WCO lists */
     List       *returningLists; /* per-target-table RETURNING tlists */
     List       *fdwPrivLists;   /* per-target-table FDW private data lists */
     Bitmapset  *fdwDirectModifyPlans;   /* indices of FDW DM plans */
     List       *rowMarks;       /* PlanRowMarks (non-locking only) */
     int         epqParam;       /* ID of Param for EvalPlanQual re-eval */
     OnConflictAction onConflictAction;  /* ON CONFLICT action */
     List       *arbiterIndexes; /* List of ON CONFLICT arbiter index OIDs  */
     List       *onConflictSet;  /* SET for INSERT ON CONFLICT DO UPDATE */
     Node       *onConflictWhere;    /* WHERE for ON CONFLICT UPDATE */
     Index       exclRelRTI;     /* RTI of the EXCLUDED pseudo relation */
     List       *exclRelTlist;   /* tlist of the EXCLUDED pseudo relation */
 } ModifyTable;

8、OnConflictAction

/*
  * OnConflictAction -
  *    "ON CONFLICT" clause type of query
  *
  * This is needed in both parsenodes.h and plannodes.h, so put it here...
  */
 typedef enum OnConflictAction
 {
     ONCONFLICT_NONE,            /* No "ON CONFLICT" clause */
     ONCONFLICT_NOTHING,         /* ON CONFLICT ... DO NOTHING */
     ONCONFLICT_UPDATE           /* ON CONFLICT ... DO UPDATE */
 } OnConflictAction;

8、MemoryContext

 typedef struct MemoryContextData
 {
     NodeTag     type;           /* identifies exact kind of context */
     /* these two fields are placed here to minimize alignment wastage: */
     bool        isReset;        /* T = no space alloced since last reset */
     bool        allowInCritSection; /* allow palloc in critical section */
     const MemoryContextMethods *methods;    /* virtual function table */
     MemoryContext parent;       /* NULL if no parent (toplevel context) */
     MemoryContext firstchild;   /* head of linked list of children */
     MemoryContext prevchild;    /* previous child of same parent */
     MemoryContext nextchild;    /* next child of same parent */
     const char *name;           /* context name (just for debugging) */
     const char *ident;          /* context ID if any (just for debugging) */
     MemoryContextCallback *reset_cbs;   /* list of reset/delete callbacks */
 } MemoryContextData;
 
 /* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */
 
/*
  * Type MemoryCon

最低0.47元/天解锁文章

cuichao1900

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
PostgreSQL 源码解读（5）- 插入数据#4（ExecInsert）

本文简单介绍了PG插入数据部分的源码，主要内容包括ExecInsert函数的实现逻辑，该函数位于nodeModifyTable.c文件中。一、基础信息按惯例，首先看看ExecInsert函数使用的数...
复制链接

扫一扫