了解Go编译处理（六）—— 表达式解析

最新推荐文章于 2023-10-13 08:00:00 发布

yuchenfw

最新推荐文章于 2023-10-13 08:00:00 发布

阅读量962

点赞数

分类专栏： golang Go编译解析源码分析文章标签： go 编译表达式 expr

本文链接：https://blog.csdn.net/xz_studying/article/details/109438068

版权

golang 同时被 3 个专栏收录

58 篇文章 7 订阅

订阅专栏

源码分析

41 篇文章 2 订阅

订阅专栏

Go编译解析

3 篇文章 0 订阅

订阅专栏

前言

了解Go编译处理（五）—— 声明的解析介绍了import、const、type、var，在const及var的解析中少不了表达式的解析。本文主要关注表达式的解析-expr。

Operation

Operation用以存储解析的二元表达式，当Y==nil时，即表示一元表达式

    Operation struct {
        Op   Operator
        X, Y Expr // Y == nil means unary expression
        expr
    }

表达式解析expr

表达式分为带有单操作符表达式及多个操作符的表达式，多个操作符的表达式，可以通过解析一个个的操作符降级到单操作符的解析，解析后是以链表的形式相互连接的。

_Operator对应的操作符有&&、||、<=、<、>=、>、==、!=、!、+、-、/、%、&、&^、|、^、<<、<、>>、>、!，主要就是排除了*。

_Start则对应的就是*。

// Expressions

func (p *parser) expr() Expr {
    if trace {
        defer p.trace("expr")()
    }

    return p.binaryExpr(0)
}

// Expression = UnaryExpr | Expression binary_op Expression .
func (p *parser) binaryExpr(prec int) Expr {
    // don't trace binaryExpr - only leads to overly nested trace output

    x := p.unaryExpr()//
    for (p.tok == _Operator || p.tok == _Star) && p.prec > prec { //如果后续tok满足_Operator或_Star，则说明还有后续表达式，继续解析
        t := new(Operation)
        t.pos = p.pos()
        t.Op = p.op
        t.X = x
        tprec := p.prec
        p.next()
        t.Y = p.binaryExpr(tprec)
        x = t
    }
    return x
}

先解析单操作符表达式，若后续操作符是&&、||、<=、<、>=、>、==、!=、!、+、-、/、%、&、&^、|、^、<<、<、>>、>、!、*，则继续解析后续表达式，否则解析完成。

单操作符表达式解析

go中单个操作符表达式又可以分为初级表达式及一元表达式。

// UnaryExpr = PrimaryExpr | unary_op UnaryExpr .
func (p *parser) unaryExpr() Expr {
    if trace {
        defer p.trace("unaryExpr")()
    }

    switch p.tok {
    case _Operator, _Star://
        switch p.op {
        case Mul, Add, Sub, Not, Xor://*，+，-，！，^，这些需要解析后续的表达式
            x := new(Operation)
            x.pos = p.pos()
            x.Op = p.op
            p.next()
            x.X = p.unaryExpr()
            return x

        case And://&可能存在()的情况，可以对括号进行处理
            x := new(Operation)
            x.pos = p.pos()
            x.Op = And
            p.next()
            // unaryExpr may have returned a parenthesized composite literal
            // (see comment in operand) - remove parentheses if any
            x.X = unparen(p.unaryExpr())//如果返回带括号表达式会移除括号
            return x
        }

    case _Arrow://<-，先解析后续的表达式，再判断是发送还是接收
        // receive op (<-x) or receive-only channel (<-chan E)
        pos := p.pos()
        p.next()

        // If the next token is _Chan we still don't know if it is
        // a channel (<-chan int) or a receive op (<-chan int(ch)).
        // We only know once we have found the end of the unaryExpr.

        x := p.unaryExpr()

        // There are two cases:
        //
        //   <-chan...  => <-x is a channel type
        //   <-x        => <-x is a receive operation
        //
        // In the first case, <- must be re-associated with
        // the channel type parsed already:
        //
        //   <-(chan E)   =>  (<-chan E)
        //   <-(chan<-E)  =>  (<-chan (<-E))
        // 判断是发送还是接收
        if _, ok := x.(*ChanType); ok {
            // x is a channel type => re-associate <-
            dir := SendOnly
            t := x
            for dir == SendOnly {
                c, ok := t.(*ChanType)
                if !ok {
                    break
                }
                dir = c.Dir
                if dir == RecvOnly {
                    // t is type <-chan E but <-<-chan E is not permitted
                    // (report same error as for "type _ <-<-chan E")
                    p.syntaxError("unexpected <-, expecting chan")
                    // already progressed, no need to advance
                }
                c.Dir = RecvOnly
                t = c.Elem
            }
            if dir == SendOnly {
                // channel dir is <- but channel element E is not a channel
                // (report same error as for "type _ <-chan<-E")
                p.syntaxError(fmt.Sprintf("unexpected %s, expecting chan", String(t)))
                // already progressed, no need to advance
            }
            return x
        }

        // x is not a channel type => we have a receive op
        o := new(Operation)
        o.pos = pos
        o.Op = Recv
        o.X = x
        return o
    }

    // TODO(mdempsky): We need parens here so we can report an
    // error for "(x) := true". It should be possible to detect
    // and reject that more efficiently though.
    return p.pexpr(true)
}

根据tok进行处理：

若tok为_Operator或_Star（涵盖的操作符有&&、||、<=、<、>=、>、==、!=、!、+、-、/、%、&、&^、|、^、<<、<、>>、>、!、*），
- 当操作符为*，+，-，！，^之一时，继续解析表达式
- 若操作符为&，由于后续表达式解析返回的结果可能带有()，可以移除()
若tok为_Arrow，则根据后续表达式的解析结构判断是接收或者发送
其他tok需要进一步获取操作数后进行解析。

pexpr

pexpr用以获取操作数、转换、属性、索引、切片、断言、参数等，每种处理类型都有对应的格式，具体格式可参考代码注释。

// PrimaryExpr =
//     Operand | // 操作数
//     Conversion | // 转换
//     PrimaryExpr Selector | // 属性
//     PrimaryExpr Index | // 索引
//     PrimaryExpr Slice | // 切片
//     PrimaryExpr TypeAssertion | // 断言
//     PrimaryExpr Arguments . // 参数
//
// Selector       = "." identifier .
// Index          = "[" Expression "]" .
// Slice          = "[" ( [ Expression ] ":" [ Expression ] ) |
//                      ( [ Expression ] ":" Expression ":" Expression )
//                  "]" .
// TypeAssertion  = "." "(" Type ")" .
// Arguments      = "(" [ ( ExpressionList | Type [ "," ExpressionList ] ) [ "..." ] [ "," ] ] ")" .
func (p *parser) pexpr(keep_parens bool) Expr {
    if trace {
        defer p.trace("pexpr")()
    }

    x := p.operand(keep_parens)

loop:
    for {
        pos := p.pos()
        switch p.tok {
        case _Dot:// '.'使用有属性、方法、断言
            p.next()
            switch p.tok {
            case _Name: // 名称（属性/方法名）
                // pexpr '.' sym
                t := new(SelectorExpr)
                t.pos = pos
                t.X = x
                t.Sel = p.name()
                x = t

            case _Lparen: // '('
                p.next()
                if p.got(_Type) { // 类型转换，带`:=`
                    t := new(TypeSwitchGuard)
                    // t.Lhs is filled in by parser.simpleStmt
                    t.pos = pos
                    t.X = x
                    x = t
                } else { // 断言
                    t := new(AssertExpr)
                    t.pos = pos
                    t.X = x
                    t.Type = p.type_()
                    x = t
                }
                p.want(_Rparen)

            default:
                p.syntaxError("expecting name or (")
                p.advance(_Semi, _Rparen)
            }

        case _Lbrack:// '['的使用有索引、slice
            p.next()
            p.xnest++

            var i Expr
            if p.tok != _Colon {
                i = p.expr()
                if p.got(_Rbrack) {
                    // x[i]
                    t := new(IndexExpr)
                    t.pos = pos
                    t.X = x
                    t.Index = i
                    x = t
                    p.xnest--
                    break
                }
            }

            // x[i:...
            t := new(SliceExpr)
            t.pos = pos
            t.X = x
            t.Index[0] = i
            p.want(_Colon)
            if p.tok != _Colon && p.tok != _Rbrack {
                // x[i:j...
                t.Index[1] = p.expr()
            }
            if p.got(_Colon) {
                t.Full = true
                // x[i:j:...]
                if t.Index[1] == nil {
                    p.error("middle index required in 3-index slice")
                }
                if p.tok != _Rbrack {
                    // x[i:j:k...
                    t.Index[2] = p.expr()
                } else {
                    p.error("final index required in 3-index slice")
                }
            }
            p.want(_Rbrack)

            x = t
            p.xnest--

        case _Lparen:// '('说明是func，需要解析func的参数参数列表
            t := new(CallExpr)
            t.pos = pos
            t.Fun = x
            t.ArgList, t.HasDots = p.argList()
            x = t

        case _Lbrace:// '{'，需要确认'{'属于复合类型还是block
            // operand may have returned a parenthesized complit
            // type; accept it but complain if we have a complit
            t := unparen(x)
            // determine if '{' belongs to a composite literal or a block statement
            complit_ok := false
            switch t.(type) {
            case *Name, *SelectorExpr://.（属性、方法之类等）
                if p.xnest >= 0 {
                    // x is considered a composite literal type
                    complit_ok = true
                }
            case *ArrayType, *SliceType, *StructType, *MapType:// array,slice,struct,map，一般带'{}'主要用于初始化
                // x is a comptype
                complit_ok = true
            }
            if !complit_ok {
                break loop
            }
            if t != x {
                p.syntaxError("cannot parenthesize type in composite literal")
                // already progressed, no need to advance
            }
            n := p.complitexpr() // 获取fields属性列表
            n.Type = x
            x = n

        default:
            break loop
        }
    }

    return x
}

获取到操作数后，

如果后续tok是.，则可能进行的操作有获取属性.sel、进行类型转换lhs := p.(Type)或断言p.(Type)。
如果后续tok是[，则可能进行的操作有索引[index],slice [i:j]或者带cap的slice [i:j:k]
如果后续tok是{，若{前的类型是Name或Selector（表达式嵌套级别>=0，这些是自定义的type）、Array、Slice、Struct、Map，则说明当前是复合类型的表达式，{}内的是符合类型的元素。
以上操作若成功，则会循环进行下一个tok的处理；否则，则结束。

operand - 操作数

operand获取表达式中的操作数，操作数可能是字面量Literal、操作数名称，方法表达式，括号表达式。

字面量Literal可分为基本数据类型、复合数据类型、func。

基础数据类型有int、float、imaginary（虚数）、rune、string

复合数据类型有array、slice、map、struct、interface、chan、pointer等。

操作数名称有标识（变量名）、限定标识（如关键字）。

// Operand     = Literal | OperandName | MethodExpr | "(" Expression ")" .//字符，操作符名，方法表达式，带括号的表达式
// Literal     = BasicLit | CompositeLit | FunctionLit .//基础、复合、function
// BasicLit    = int_lit | float_lit | imaginary_lit | rune_lit | string_lit .//int，float，虚数，rune，string
// OperandName = identifier | QualifiedIdent. //标识符，限定标识符
func (p *parser) operand(keep_parens bool) Expr {
    if trace {
        defer p.trace("operand " + p.tok.String())()
    }

    switch p.tok {
    case _Name: // 名称（变量名）
        return p.name()

    case _Literal: // 字面量，基本类型int、float、imag、rune、string
        return p.oliteral()

    case _Lparen: // '('，解析括号内的表达式，如果括号后紧跟着'{'，需要特殊处理
        pos := p.pos()
        p.next()
        p.xnest++
        x := p.expr()
        p.xnest--
        p.want(_Rparen) // ')'

        // Optimization: Record presence of ()'s only where needed
        // for error reporting. Don't bother in other cases; it is
        // just a waste of memory and time.

        // Parentheses are not permitted on lhs of := .
        // switch x.Op {
        // case ONAME, ONONAME, OPACK, OTYPE, OLITERAL, OTYPESW:
        //     keep_parens = true
        // }

        // Parentheses are not permitted around T in a composite
        // literal T{}. If the next token is a {, assume x is a
        // composite literal type T (it may not be, { could be
        // the opening brace of a block, but we don't know yet).
        if p.tok == _Lbrace { // '{'
            keep_parens = true
        }

        // Parentheses are also not permitted around the expression
        // in a go/defer statement. In that case, operand is called
        // with keep_parens set.
        if keep_parens {
            px := new(ParenExpr)
            px.pos = pos
            px.X = x
            x = px
        }
        return x

    case _Func: // func，获取funcType，如果有funcBody，则继续解析funcBody
        pos := p.pos()
        p.next()
        t := p.funcType()
        if p.tok == _Lbrace {
            p.xnest++

            f := new(FuncLit)
            f.pos = pos
            f.Type = t
            f.Body = p.funcBody()

            p.xnest--
            return f
        }
        return t

    case _Lbrack, _Chan, _Map, _Struct, _Interface: // '[',chan,map,struct,interface 获取具体的类型
        return p.type_() // othertype

    default:
        x := p.badExpr()
        p.syntaxError("expecting expression")
        p.advance(_Rparen, _Rbrack, _Rbrace)
        return x
    }

    // Syntactically, composite literals are operands. Because a complit
    // type may be a qualified identifier which is handled by pexpr
    // (together with selector expressions), complits are parsed there
    // as well (operand is only called from pexpr).
}

根据tok进行操作处理：

_Name直接获取name
_Literal会获取对应基本类型的表达式封装
_Lparen，则先处理()内的表达式，如果后续有{，则会保存当前的()
_Func，则先获取funcType，如果后续有{，则会获取funcBody
‘[’,chan,map,struct,interface，则获取具体的类型
其他的则是非法的表达式

type_

type_获取表达式中操作数的类型，如果不符合相应类型的规则会报错。

type_可以获取类型名、具体类型、带括号的类型（需要进一步解析）

类型名可分为自定义标识名（变量等）、关键字。

具体类型有array、struct、pointer、func、interface、slice、map、channel。

func (p *parser) type_() Expr {
    if trace {
        defer p.trace("type_")()
    }

    typ := p.typeOrNil()
    if typ == nil {
        typ = p.badExpr()
        p.syntaxError("expecting type")
        p.advance(_Comma, _Colon, _Semi, _Rparen, _Rbrack, _Rbrace)
    }

    return typ
}

// typeOrNil is like type_ but it returns nil if there was no type
// instead of reporting an error.
//
// Type     = TypeName | TypeLit | "(" Type ")" .
// TypeName = identifier | QualifiedIdent .
// TypeLit  = ArrayType | StructType | PointerType | FunctionType | InterfaceType |
//           SliceType | MapType | Channel_Type .
func (p *parser) typeOrNil() Expr {
    if trace {
        defer p.trace("typeOrNil")()
    }

    pos := p.pos()
    switch p.tok {
    case _Star:// 指针类型
        // ptrtype
        p.next()
        return newIndirect(pos, p.type_())

    case _Arrow: // 接收chan
        // recvchantype
        p.next()
        p.want(_Chan)
        t := new(ChanType)
        t.pos = pos
        t.Dir = RecvOnly
        t.Elem = p.chanElem()
        return t

    case _Func: // func
        // fntype
        p.next()
        return p.funcType()

    case _Lbrack: // '['
        // '[' oexpr ']' ntype
        // '[' _DotDotDot ']' ntype
        p.next()
        p.xnest++
        if p.got(_Rbrack) { // slice
            // []T
            p.xnest--
            t := new(SliceType)
            t.pos = pos
            t.Elem = p.type_()
            return t
        }
        //  array
        // [n]T
        t := new(ArrayType)
        t.pos = pos
        if !p.got(_DotDotDot) {
            t.Len = p.expr()
        }
        p.want(_Rbrack)
        p.xnest--
        t.Elem = p.type_()
        return t

    case _Chan: // 非接收者的chan
        // _Chan non_recvchantype
        // _Chan _Comm ntype
        p.next()
        t := new(ChanType)
        t.pos = pos
        if p.got(_Arrow) {
            t.Dir = SendOnly
        }
        t.Elem = p.chanElem()
        return t

    case _Map: // map
        // _Map '[' ntype ']' ntype
        p.next()
        p.want(_Lbrack)
        t := new(MapType)
        t.pos = pos
        t.Key = p.type_()
        p.want(_Rbrack)
        t.Value = p.type_()
        return t

    case _Struct: // staruct
        return p.structType()

    case _Interface: // interface
        return p.interfaceType()

    case _Name: // 如果带'.'，继续取之后的name
        return p.dotname(p.name())

    case _Lparen: // '('，继续获取type
        p.next()
        t := p.type_()
        p.want(_Rparen)
        return t
    }

    return nil
}

complitexpr

获取Type { ElemList[0], ElemList[1], ... }内的elements。

// LiteralValue = "{" [ ElementList [ "," ] ] "}" .
func (p *parser) complitexpr() *CompositeLit {
    if trace {
        defer p.trace("complitexpr")()
    }

    x := new(CompositeLit)
    x.pos = p.pos()

    p.xnest++
    x.Rbrace = p.list(_Lbrace, _Comma, _Rbrace, func() bool {
        // value
        e := p.bare_complitexpr()
        if p.tok == _Colon {
            // key ':' value
            l := new(KeyValueExpr)
            l.pos = p.pos()
            p.next()
            l.Key = e
            l.Value = p.bare_complitexpr()
            e = l
            x.NKeys++
        }
        x.ElemList = append(x.ElemList, e)
        return false
    })
    p.xnest--

    return x
}