查询代码-expr解析

shmilyyiyi

已于 2024-01-09 00:38:59 修改

阅读量765

点赞数 21

分类专栏： opengauss代码分析文章标签：数据库开源

于 2024-01-08 23:53:30 首次发布

本文链接：https://blog.csdn.net/shmilyyiyi/article/details/135469240

版权

opengauss代码分析专栏收录该内容

3 篇文章 0 订阅

订阅专栏

文件路径

/src/common/backend/parser/parse_expr.cpp

文件内容

在这篇文章中我会解释一下几个函数的基本内容和作用：

static Const* BuildColumnBaseValue(Form_pg_attribute attTup)

{

    if (IsBaseRightRefSupportType(attTup->atttypid)) {

        Datum datum = GetTypeZeroValue(attTup);

        return makeConst(attTup->atttypid,

                         attTup->atttypmod,

                         attTup->attcollation,

                         attTup->attlen,

                         datum,

                         false, /* constisnull */

                         attTup->attbyval);

    } else if (type_is_enum(attTup->atttypid)) {

        Relation enumRel = heap_open(EnumRelationId, AccessShareLock);

        CatCList* items = SearchSysCacheList1(ENUMTYPOIDNAME, ObjectIdGetDatum(attTup->atttypid));

        int itemCnt = items->n_members;
       

        for (int eindex = 0; eindex < itemCnt; ++eindex) {

            HeapTuple enumTup = t_thrd.lsc_cxt.FetchTupleFromCatCList(items, eindex);

            Form_pg_enum item = (Form_pg_enum)GETSTRUCT(enumTup);

            if (item && item->enumsortorder == 1) {

                Datum datum = DirectFunctionCall2(enum_in,

                                                  CStringGetDatum(pstrdup(NameStr(item->enumlabel))),

                                                  attTup->atttypid);

                ReleaseSysCacheList(items);

                heap_close(enumRel, AccessShareLock);         

                return makeConst(attTup->atttypid,

                                 attTup->atttypmod,

                                 attTup->attcollation,

                                 attTup->attlen,

                                 datum,

                                 false, /* constisnull */

                                 attTup->attbyval);
            }

        }

        ReleaseSysCacheList(items);

        heap_close(enumRel, AccessShareLock);

    } else if (type_is_set(attTup->atttypid)) {

        return makeConst(attTup->atttypid,

                         attTup->atttypmod,

                         attTup->attcollation,

                         attTup->attlen,

                         CStringGetTextDatum(""), /* datum */

                         false, /* constisnull */

                         attTup->attbyval);
 }

    return nullptr;

}

这个函数 BuildColumnBaseValue 是用于构建列基础值的。它接受一个 Form_pg_attribute 类型的参数 attTup，这通常代表一个数据库列的属性。函数的主要目的是根据列的类型来创建一个 Const 结构，这个结构包含了列的基本信息和一个初始值。函数的执行过程分为几个部分：

基本类型支持：首先检查列的类型 attTup->atttypid 是否是基本类型，如果是，则获取该类型的零值（默认值），然后使用这个零值和列的其他属性创建一个 Const 对象。

枚举类型处理：如果列的类型是枚举类型，则函数会打开枚举类型的关系（Relation），并获取所有枚举项。它会遍历这些枚举项，寻找排序顺序为1的枚举项，并使用这个枚举项的值来创建一个 Const 对象。完成后，会释放相关资源。

集合类型处理：如果列的类型是集合类型，则会创建一个包含空字符串的 Const 对象。

整个函数的目的是根据不同的列类型，提供一个相应的默认值或基础值，这在数据库管理和操作中是很常见的需求。

static void AddDefaultExprNode(ParseState* pstate)

{
    RightRefState* refState = pstate->rightRefState;

    if (refState->isInsertHasRightRef) {

        return;
    }

    pstate->rightRefState->isInsertHasRightRef = true;
  

    Relation relation = (Relation)linitial(pstate->p_target_relation);

    TupleDesc rdAtt = relation->rd_att;

    int fieldCnt = rdAtt->natts;

    refState->constValues = (Const**)palloc0(sizeof(Const*) * fieldCnt);
  

    eval_const_expressions_context context;

    context.boundParams = nullptr;

    context.root = nullptr;

    context.active_fns = NIL;

    context.case_val = NULL;

    context.estimate = false;


    for (int i = 0; i < fieldCnt; ++i) {

        FormData_pg_attribute *attTup = &rdAtt->attrs[i];

        if (IsAutoIncrementColumn(rdAtt, i + 1)) {

            refState->constValues[i] = makeConst(attTup->atttypid, -1, attTup->attcollation,

                      attTup->attlen, (Datum)0, false, attTup->attbyval);

        } else if (ISGENERATEDCOL(rdAtt, i)) {

            refState->constValues[i] = nullptr;

        } else {

            Node* node = build_column_default(relation, i + 1, true);

            if (node == nullptr) {

                refState->constValues[i] = nullptr;

            } else if (IsA(node, Const)) {

                refState->constValues[i] = (Const*)node;

            } else if (IsA(node, FuncExpr)) {

                FuncExpr* expr = (FuncExpr*)node;

                List* args = expr->args;

                Expr* simple = simplify_function(expr->funcid, expr->funcresulttype, exprTypmod((const Node*)expr),

                                                    expr->funccollid, expr->inputcollid, &args, true, false, &context);

                if (simple && IsA(simple, Const)) {

                    refState->constValues[i] = (Const*)simple;

                } else {

                    refState->constValues[i] = nullptr;
                }

            } else {

                refState->constValues[i] = nullptr;

            }
        }


        /* support not null constraint */

        if (refState->constValues[i] == nullptr && attTup && attTup->attnotnull) {

            refState->constValues[i] = BuildColumnBaseValue(attTup);

        }    }
}

这个函数 AddDefaultExprNode 在数据库编程环境中用于处理插入操作中的默认表达式。它主要用于在插入操作时，为那些没有显式提供值的列生成默认值。下面是函数内容的详细解释：

检查和设置状态：函数首先检查 ParseState 结构中的 rightRefState 成员变量，如果已经处理过右引用（isInsertHasRightRef），则直接返回。如果没有处理过，它将这个状态设置为 true，表示开始处理。

初始化变量：它获取目标关系（表）的元组描述（TupleDesc），这包括了列的数量和详细信息。然后，为每个字段分配空间以存储常量值（Const 对象）。

处理每个字段：函数遍历所有字段。对于每个字段，它执行以下操作：

如果字段是自增列，则为其创建一个常量值。

如果字段是生成的列（如计算列），则不分配常量值。

对于其他列，尝试构建该列的默认值。这涉及到检查该列是否有默认表达式，如果有，则评估这个表达式。

简化函数表达式：如果默认表达式是一个函数，函数会尝试简化这个函数表达式。简化成功且结果为常量时，将该常量值存储；否则，不存储任何值。

处理非空约束：如果某个字段没有分配默认值，且该字段被定义为非空（attnotnull），则会调用 BuildColumnBaseValue 函数为其生成一个基础值。

更新状态：在处理完所有字段后，rightRefState 中的 constValues 将包含每个字段对应的默认值或基础值，或者为 nullptr（如果没有默认值或基础值）。

这个函数在数据库中处理插入操作时非常重要，它确保了即使在插入数据时没有为某些字段提供值，这些字段也能获得合适的默认值或满足非空约束。

static void unknown_attribute(ParseState* pstate, Node* relref, char* attname, int location)
{

    RangeTblEntry* rte = NULL;


    if (IsA(relref, Var) && ((Var*)relref)->varattno == InvalidAttrNumber) {

        /* Reference the RTE by alias not by actual table name */

        rte = GetRTEByRangeTablePosn(pstate, ((Var*)relref)->varno, ((Var*)relref)->varlevelsup);

        ereport(ERROR,

            (errcode(ERRCODE_UNDEFINED_COLUMN),

                errmsg("column %s.%s does not exist", rte->eref->aliasname, attname),

                parser_errposition(pstate, location)));

    } else {

        /* Have to do it by reference to the type of the expression */

        Oid relTypeId = exprType(relref);


        if (ISCOMPLEX(relTypeId)) {

            ereport(ERROR,

                (errcode(ERRCODE_UNDEFINED_COLUMN),

                    errmsg("column \"%s\" not found in data type %s", attname, format_type_be(relTypeId)),

                    parser_errposition(pstate, location)));

        } else if (relTypeId == RECORDOID) {

            ereport(ERROR,

                (errcode(ERRCODE_UNDEFINED_COLUMN),

                    errmsg("could not identify column \"%s\" in record data type", attname),

                    parser_errposition(pstate, location)));
        } else {
            ereport(ERROR,

                (errcode(ERRCODE_WRONG_OBJECT_TYPE),

                    errmsg("column notation .%s applied to type %s, "

                           "which is not a composite type",

                        attname,

                        format_type_be(relTypeId)),

                    parser_errposition(pstate, location)));

        }
    }
}

函数 unknown_attribute 在数据库查询解析中用于处理未知属性错误。当查询引用了不存在的列时，此函数会被调用以生成错误消息。它首先判断引用是否来自一个变量节点 (Var)，若是，则通过范围表项确定引用的表别名和不存在的列名来报错。若引用不是变量节点，它会检查引用的类型（如复合类型或记录类型），并据此生成不同的错误消息。这个函数的主要作用是确保在查询中引用了错误或不存在的列时，用户能够接收到清晰、准确的错误信息。

static Node* transformIndirection(ParseState* pstate, A_Indirection* ind)
{
    Node* last_srf = pstate->p_last_srf;

    Node* result = transformExprRecurse(pstate, ind->arg);

    List* subscripts = NIL;

    int location = exprLocation(result);

    ListCell* i = NULL;

    /*

     * We have to split any field-selection operations apart from

     * subscripting.  Adjacent A_Indices nodes have to be treated as a single

     * multidimensional subscript operation.

     */

    foreach (i, ind->indirection) {

        Node* n = (Node*)lfirst(i);

        if (IsA(n, A_Indices)) {

            subscripts = lappend(subscripts, n);

        } else if (IsA(n, A_Star)) {

            ereport(ERROR,

                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

                    errmsg("row expansion via \"*\" is not supported here"),

                    parser_errposition(pstate, location)));
        } else {

            Node* newresult = NULL;

            AssertEreport(IsA(n, String), MOD_OPT, "");

            /* process subscripts before this field selection */

            if (subscripts != NIL) {

                result = (Node*)transformArraySubscripts(

                    pstate, result, exprType(result), InvalidOid, exprTypmod(result), subscripts, NULL);

            }
            subscripts = NIL;


            newresult = ParseFuncOrColumn(pstate, list_make1(n), list_make1(result), last_srf, NULL, location);

            if (newresult == NULL) {

                unknown_attribute(pstate, result, strVal(n), location);
            }
            result = newresult;
        }
    }
    /* process trailing subscripts, if any */

    if (subscripts != NIL) {

        result = (Node*)transformArraySubscripts(

            pstate, result, exprType(result), InvalidOid, exprTypmod(result), subscripts, NULL);

    }
    return result;
}

transformIndirection 函数在数据库查询解析中扮演着关键角色，专门用于转换间接引用，例如数组下标和字段选择，特别是在处理复杂表达式时显得尤为重要。该函数首先接收一个 A_Indirection 节点和解析状态 ParseState，它将 ind->arg（基础表达式）进行初步转换并存储结果。接着，函数遍历 ind->indirection 列表，处理其中的下标或字段选择操作。对于 A_Indices 类型的元素（数组下标），它们被添加到下标列表 subscripts 中。而 A_Star 类型（星号，用于行扩展）则会触发错误报告，因为这类行扩展在当前上下文中不被支持。其他类型的元素（通常是字段名）则在处理完所有下标后，通过 ParseFuncOrColumn 进行解析。如果有未处理的下标，函数会调用 transformArraySubscripts 来转换它们为数组访问操作。在字段选择的处理中，如果 ParseFuncOrColumn 无法解析特定字段或函数（返回 NULL），则会调用 unknown_attribute 函数来处理这种未知属性错误。

static Node* replaceExprAliasIfNecessary(ParseState* pstate, char* colname, ColumnRef* cref)
{

    ListCell* lc = NULL;

    bool isFind = false;

    Expr* matchExpr = NULL;

    TargetEntry* tle = NULL;

    foreach (lc, pstate->p_target_list) {

        tle = (TargetEntry*)lfirst(lc);

        /*

         * 1. in a select stmt in stored procudre, a columnref may be a param(e.g. a declared var or the stored

         *    procedure's arg), which is not a alias, so can not be matched here.

         * 2. in a select stmt in stored procudre such like a[1],a[2],a[3], they have same name,

         *    so, we should pass this target.

         */

        bool isArrayParam = IsA(tle->expr, ArrayRef) && ((ArrayRef*)tle->expr)->refexpr != NULL &&

                            IsA(((ArrayRef*)tle->expr)->refexpr, Param);

        if (tle->resname != NULL && !IsA(tle->expr, Param) && !isArrayParam &&

            strncmp(tle->resname, colname, strlen(colname) + 1) == 0) {

            if (checkExprHasWindowFuncs((Node*)tle->expr)) {

                ereport(ERROR,

                    (errcode(ERRCODE_UNDEFINED_COLUMN),

                        errmsg("Alias \"%s\" reference with window function included is not supported.", colname),

                        parser_errposition(pstate, cref->location)));

#ifndef ENABLE_MULTIPLE_NODES

            } else if (ContainRownumExpr((Node*)tle->expr)) {

                ereport(ERROR,

                    (errcode(ERRCODE_UNDEFINED_COLUMN),

                     errmsg("Alias \"%s\" reference with ROWNUM included is invalid.", colname),

                     parser_errposition(pstate, cref->location)));

#endif                                      
            } else if (contain_volatile_functions((Node*)tle->expr)) {

                ereport(ERROR,

                    (errcode(ERRCODE_UNDEFINED_COLUMN),

                        errmsg("Alias \"%s\" reference with volatile function included is not supported.", colname),

                        parser_errposition(pstate, cref->location)));
            } else {

                if (!isFind) {

                    matchExpr = tle->expr;

                    isFind = true;

                } else {

                    ereport(ERROR,

                        (errcode(ERRCODE_UNDEFINED_COLUMN),

                            errmsg("Alias \"%s\" is ambiguous.", colname),

                            parser_errposition(pstate, cref->location)));

                   return NULL;
                }
            }
        }
    }
    return (Node*)copyObject(matchExpr);
}

replaceExprAliasIfNecessary 函数在数据库查询解析中起关键作用，专门用于在目标列表中匹配并替换列引用（ColumnRef）与别名。它遍历解析状态的目标列表，检查每个目标项是否与给定的列名相匹配，同时考虑特殊情况，如数组参数或参数类型。该函数还处理错误情况，如表达式中包含窗口函数、ROWNUM 或易变函数。若发现别名歧义或其他错误，则报错。最终，如果找到唯一匹配的表达式且无错误，函数返回这个表达式的副本，确保查询的准确性和一致性。