《用Go语言自制解释器》之第2章 语法分析
第2章 语法分析
2.1 语法分析器
词法单元转换为对应的数据结构(解析树、抽象语法树等),并检查语法是否正确。
语法树或抽象语法树(Abstract Syntax Tree,AST),表示源代码的数据结构。
2.4 解析let语句
let <标识符> = <表达式>;
表达式会产生值,语句不会。
语句 = 表达式 + ;
节点
//ast/ast.go
package ast
// 基础节点接口
type Node interface {
String() string
}
// 语句
type Statement interface {
Node
}
// 表达式
type Expression interface {
Node
}
Program实现
//ast/ast.go
// 程序
type Program struct {
Statements []Statement
}
func (p *Program) String() string {
var out bytes.Buffer
for _, s := range p.Statements {
out.WriteString(s.String())
}
return out.String()
}
let语句
//ast/ast.go
type LetStatement struct {
Name *Identifier // 标识符
Value Expression // 右侧表达式
}
func (ls *LetStatement) String() string {
var out bytes.Buffer
out.WriteString("let ")
out.WriteString(ls.Name.String())
out.WriteString(" = ")
out.WriteString(ls.Value.String())
out.WriteString(";")
out.WriteString("\n")
return out.String()
}
// 标识符
type Identifier struct {
Token token.Token // 词法单元
}
func (i *Identifier) String() string {
return i.Token.Literal
}
初版语法分析器
//parser/parser.go
package parser
import (
"monkey/ast"
"monkey/lexer"
"monkey/token"
)
type Parser struct {
l *lexer.Lexer
curToken token.Token
peekToken token.Token
}
func New(l *lexer.Lexer) *Parser {
p := &Parser{l: l}
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) nextToken() {
p.curToken = p.peekToken
p.peekToken = p.l.NextToken()
}
func (p *Parser) ParseProgram() *ast.Program {
return nil
}
程序解析
语法分析器的任务是,反复前移词法单元指针并检查当前词法单元,以决定下一步的操作。
//parser/parser.go
func (p *Parser) ParseProgram() *ast.Program {
program := &ast.Program{}
program.Statements = []ast.Statement{}
for p.curToken.Type != token.EOF {
if stmt := p.parseStatement(); stmt != nil {
program.Statements = append(program.Statements, stmt)
}
}
return program
}
语句解析
//parser/parser.go
func (p *Parser) parseStatement() ast.Statement {
switch p.curToken.Type {
case token.LET:
return p.parseLetStatement()
default:
return nil
}
}
let语句解析
//parser/parser.go
func (p *Parser) parseLetStatement() *ast.LetStatement {
stmt := &ast.LetStatement{}
if !p.expectPeek(token.IDENT) {
return nil
}
stmt.Name = &ast.Identifier{Token: p.curToken}
if !p.expectPeek(token.ASSIGN) {
return nil
}
for !p.curTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) curTokenIs(t token.TokenType) bool {
return p.curToken.Type == t
}
func (p *Parser) peekTokenIs(t token.TokenType) bool {
return p.peekToken.Type == t
}
func (p *Parser) expectPeek(t token.TokenType) bool {
if p.peekTokenIs(t) {
p.nextToken()
return true
} else {
return false
}
}
错误处理
//parser/parser.go
type Parser struct {
l *lexer.Lexer
errors []string
curToken token.Token
peekToken token.Token
}
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) Errors() []string {
return p.errors
}
func (p *Parser) peekError(t token.TokenType) {
msg := fmt.Sprintf("expected next token to be %s, got %s instead",
t, p.peekToken.Type)
p.errors = append(p.errors, msg)
}
func (p *Parser) expectPeek(t token.TokenType) bool {
if p.peekTokenIs(t) {
p.nextToken()
return true
} else {
p.peekError(t)
return false
}
}
2.5 解析return语句
return <表达式>;
return语句
//ast/ast.go
// return语句
type ReturnStatement struct {
ReturnValue Expression //return右边表达式
}
func (rs *ReturnStatement) String() string {
var out bytes.Buffer
out.WriteString("return ")
out.WriteString(rs.ReturnValue.String())
out.WriteString(";")
out.WriteString("\n")
return out.String()
}
语句解析
func (p *Parser) parseStatement() ast.Statement {
switch p.curToken.Type {
case token.LET:
return p.parseLetStatement()
case token.RETURN:
return p.parseReturnStatement()
default:
return nil
}
}
return语句解析
func (p *Parser) parseReturnStatement() *ast.ReturnStatement {
stmt := &ast.ReturnStatement{}
p.nextToken()
for !p.curTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
2.6 解析表达式
外层括号表示分组表达式,内存括号是调用表达式。
5*(add(2, 3)+10)
2.6.1 表达式
- 使用前缀运算符的表达式
-5
!true
!false
- 使用中缀(二元)运算符的表达式
5+5
5-5
5/5
5*5
- 比较运算符
foo == bar
foo != bar
foo < bar
foo > bar
- 括号对表达式进行分组
5*(5+5)
- 调用表达式
add(2, 3)
max(5, add(5, 5*5))
- 标识符表达式
foo*bar
add(foo, bar)
- 函数字面量表达式
let add = fn(x, y){return x+y;};
(fn(x, y){return x+y;}(5, 5)+10)*10
- if表达式
let result = if(10>5){true;}else{false;};
2.6.2 自上而下的运算符优先级分析
词法单元类型关联解析函数(前缀、中缀、后缀等)。
2.6.3 术语
- 前缀运算符是位于操作数(operand)前面的运算符。
--5
- 后缀运算符是位于操作数后面的运算符。
foo++
- 中缀运算符是位于两个操作数之间的运算符。
5*8
- 运算符优先级(运算顺序),也叫运算符黏性(运算符黏住了周围多少个操作数),表示不同运算符的重要程度。
5 + 5 * 10
2.6.4 准备AST
expression语句
//ast/ast.go
// expression语句
type ExpressionStatement struct {
Expression Expression
}
func (es *ExpressionStatement) String() string {
return es.Expression.String() + ";" + "\n"
}
2.6.5 实现普拉特语法分析器
词法单元类型关联解析函数(语义代码)。遇到某种词法单元类型时,调用相关联的解析函数来解析对应表达式,最后返回生成的AST节点。
解析函数
//parser/parser.go
type (
prefixParseFn func() ast.Expression //前缀解析函数
infixParseFn func(ast.Expression) ast.Expression //中缀解析函数
)
//词法单元类型与解析函数关联。
type Parser struct {
l *lexer.Lexer
errors []string
curToken token.Token
peekToken token.Token
prefixParseFns map[token.TokenType]prefixParseFn
infixParseFns map[token.TokenType]infixParseFn
}
//注册解析函数
func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) {
p.prefixParseFns[tokenType] = fn
}
func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) {
p.infixParseFns[tokenType] = fn
}
2.6.6 标识符
标识符是最简单的表达式类型。
语句解析
//parser/parser.go
//解析三种类型语句
func (p *Parser) parseStatement() ast.Statement {
switch p.curToken.Type {
case token.SEMICOLON:
return nil
case token.LET:
return p.parseLetStatement()
case token.RETURN:
return p.parseReturnStatement()
default:
return p.parseExpressionStatement()
}
}
expression语句解析
//parser/parser.go
func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement {
stmt := &ast.ExpressionStatement{}
stmt.Expression = p.parseExpression(LOWEST)
if p.peekTokenIs(token.SEMICOLON) {//表达式语句分号可选
p.nextToken() //存在分号,前移curToken指向这个分号
}
return stmt
}
优先级
//parser/parser.go
const (
_ int = iota
LOWEST
EQUALS // == !=
LESSGREATER // > or <
SUM // + -
PRODUCT // * /
PREFIX // -X or !X
CALL // myFunction(X)
)
表达式解析
//parser/parser.go
func (p *Parser) parseExpression(precedence int) ast.Expression {
prefix := p.prefixParseFns[p.curToken.Type]
if prefix == nil {
return nil
}
//调用前缀解析函数
leftExp := prefix()
return leftExp
}
注册标识符前缀解析函数
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
p.registerPrefix(token.IDENT, p.parseIdentifier)
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parseIdentifier() ast.Expression {
return &ast.Identifier{Token: p.curToken}
}
2.6.7 整形字面量
整形字面量也是表达式,其所产生的值就是整数本身。
所有表达式类型(整形字面量、标识符、调用表达式、分组表达式、函数字面量等)可以互换。
整数字面量
//ast/ast.go
// 整数字面量
type IntegerLiteral struct {
Token token.Token
Value int64
}
func (il *IntegerLiteral) String() string { return il.Token.Literal }
注册整形字面量前缀解析函数
//parser/parser.go
import {
"strconv"
}
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
p.registerPrefix(token.IDENT, p.parseIdentifier)
p.registerPrefix(token.INT, p.parseIntegerLiteral)
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parseIntegerLiteral() ast.Expression {
lit := &ast.IntegerLiteral{Token: p.curToken}
value, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
if err != nil {
msg := fmt.Sprintf("could not parse %q as integer", p.curToken.Literal)
p.errors = append(p.errors, msg)
return nil
}
lit.Value = value
return lit
}
2.6.8 前缀运算符
!与-
!foo;
5 + -10;
<前缀运算符><表达式>;
前缀表达式
//ast/ast.go
// 前缀表达式
type PrefixExpression struct {
Token token.Token // The prefix token, e.g. !
Right Expression
}
func (pe *PrefixExpression) String() string {
var out bytes.Buffer
out.WriteString("(")
out.WriteString(pe.Token.Literal)
out.WriteString(pe.Right.String())
out.WriteString(")")
return out.String()
}
token无前缀解析函数时报错
func (p *Parser) noPrefixParseFnError(t token.TokenType) {
msg := fmt.Sprintf("no prefix parse function for %s found", t)
p.errors = append(p.errors, msg)
}
func (p *Parser) parseExpression(precedence int) ast.Expression {
prefix := p.prefixParseFns[p.curToken.Type]
if prefix == nil {
p.noPrefixParseFnError(p.curToken.Type)
return nil
}
leftExp := prefix()
return leftExp
}
注册前缀解析函数
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
p.registerPrefix(token.IDENT, p.parseIdentifier)
p.registerPrefix(token.INT, p.parseIntegerLiteral)
p.registerPrefix(token.BANG, p.parsePrefixExpression)
p.registerPrefix(token.MINUS, p.parsePrefixExpression)
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parsePrefixExpression() ast.Expression {
expression := &ast.PrefixExpression{
Token: p.curToken,
Operator: p.curToken.Literal,
}
p.nextToken()
expression.Right = p.parseExpression(PREFIX)
return expression
}
2.6.9 中缀运算符
<表达式> <中缀运算符> <表达式>
5 + 5;
5 - 5;
5 * 5;
5 / 5;
5 > 5;
5 < 5;
5 == 5;
5 != 5;
中缀表达式
// 中缀表达式
type InfixExpression struct {
Token token.Token // The operator token, e.g. +
Left Expression
Right Expression
}
func (ie *InfixExpression) String() string {
var out bytes.Buffer
out.WriteString("(")
out.WriteString(ie.Left.String())
out.WriteString(" " + ie.Token.Literal + " ")
out.WriteString(ie.Right.String())
out.WriteString(")")
return out.String()
}
优先级表及辅助函数
var precedences = map[token.TokenType]int{
token.EQ: EQUALS,
token.NOT_EQ: EQUALS,
token.LT: LESSGREATER,
token.GT: LESSGREATER,
token.PLUS: SUM,
token.MINUS: SUM,
token.SLASH: PRODUCT,
token.ASTERISK: PRODUCT,
token.LPAREN: CALL,
}
func (p *Parser) peekPrecedence() int {
if p, ok := precedences[p.peekToken.Type]; ok {
return p
}
return LOWEST
}
func (p *Parser) curPrecedence() int {
if p, ok := precedences[p.curToken.Type]; ok {
return p
}
return LOWEST
}
注册中缀表达式解析函数
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
p.registerPrefix(token.IDENT, p.parseIdentifier)
p.registerPrefix(token.INT, p.parseIntegerLiteral)
p.registerPrefix(token.BANG, p.parsePrefixExpression)
p.registerPrefix(token.MINUS, p.parsePrefixExpression)
p.infixParseFns = make(map[token.TokenType]infixParseFn)
p.registerInfix(token.PLUS, p.parseInfixExpression)
p.registerInfix(token.MINUS, p.parseInfixExpression)
p.registerInfix(token.SLASH, p.parseInfixExpression)
p.registerInfix(token.ASTERISK, p.parseInfixExpression)
p.registerInfix(token.EQ, p.parseInfixExpression)
p.registerInfix(token.NOT_EQ, p.parseInfixExpression)
p.registerInfix(token.LT, p.parseInfixExpression)
p.registerInfix(token.GT, p.parseInfixExpression)
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parseInfixExpression(left ast.Expression) ast.Expression {
expression := &ast.InfixExpression{
Token: p.curToken,
Left: left,
}
precedence := p.curPrecedence()
p.nextToken()
expression.Right = p.parseExpression(precedence)
return expression
}
解析表达式最终版本
func (p *Parser) parseExpression(precedence int) ast.Expression {
prefix := p.prefixParseFns[p.curToken.Type]
if prefix == nil {
p.noPrefixParseFnError(p.curToken.Type)
return nil
}
leftExp := prefix()
for !p.peekTokenIs(token.SEMICOLON) && precedence < p.peekPrecedence() {
infix := p.infixParseFns[p.peekToken.Type]
if infix == nil {
return leftExp
}
p.nextToken()
leftExp = infix(leftExp)
}
return leftExp
}
2.7 普拉特解析的工作方式
表达式语句1+2+3
解析后得到AST,序列化字符串为
((1 + 2) + 3)
表达式解析过程:
1 当前token ’1‘,优先级LOWEST
1.1 调用token.INT注册的前缀解析函数parseIntegerLiteral
得到ast.IntegerLiteral 1。
1.2 precedence{LOWEST}<p.peekPrecedence(){SUM}成立,调用中缀解析函数
传入ast.IntegerLiteral 1,作为left表达式。
2 当前token ’2‘,优先级SUM
2.1 调用token.INT注册的前缀解析函数parseIntegerLiteral
得到*ast.IntegerLiteral 2。
2.2 precedence{SUM}<p.peekPrecedence(){SUM}不成立,返回中缀表达式
返回中缀表达式(1 + 2)。
2.3 表达式1+2解析结束,返回1.2。
1.3 precedence{LOWEST}<p.peekPrecedence(){SUM}成立,调用中缀解析函数
此时left为中缀表达式(1+2),right为3
1.4 precedence{LOWEST}<p.peekPrecedence(){LOWEST}不成立,跳出1.2for循环,解析结束
得到最终ast((1 + 2) + 3)。
- 使用优先级是为了让具有较高优先级的运算符表达式位于树中更高的位置,而较低优先级的运算符表达式位于树中较低的位置。
- precedence的值表示当前parseExpression调用中的右约束能力,当前表达式(包含后续的词法单元)右边“约束”词法单元、运算符、操作数的能力。
- peekPrecedence表示下一个运算符peekToken的左约束能力。
- precedence<p.peekPrecedence()表示下一个词法单元左约束能力强于当前词法单元的右约束能力。
//*左约束能力大于+右约束能力,所以
//(1 + (2 * 3))
1 + 2 * 3
2.8 扩展语法分析器
2.8.1 布尔字面量
布尔字面量
// ast/ast.go
// 布尔字面量
type Boolean struct {
Token token.Token
Value bool
}
func (b *Boolean) String() string { return b.Token.Literal }
布尔字面量解析函数
// parser/parser.go
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
p.registerPrefix(token.TRUE, p.parseBoolean)
p.registerPrefix(token.FALSE, p.parseBoolean)
// Read two tokens, so curToken and peekToken are both set
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parseBoolean() ast.Expression {
return &ast.Boolean{Token: p.curToken, Value: p.curTokenIs(token.TRUE)}
}
2.8.2 分组表达式
用括号给表达式分组以修改其优先级,从而影响表达式在上下文中的求值顺序。
分组表达式解析函数
// parser/parser.go
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
//遇到(调用分组解析函数
p.registerPrefix(token.LPAREN, p.parseGroupedExpression)
// Read two tokens, so curToken and peekToken are both set
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parseGroupedExpression() ast.Expression {
p.nextToken()
exp := p.parseExpression(LOWEST)
if !p.expectPeek(token.RPAREN) {
return nil
}
return exp
}
2.8.3 if表达式
if (条件) <结果>
if (条件) <结果> else <可替代结果>
if表达式
// ast/ast.go
// if表达式
type IfExpression struct {
Condition Expression
Consequence *BlockStatement
Alternative *BlockStatement
}
func (ie *IfExpression) String() string {
var out bytes.Buffer
out.WriteString("if")
out.WriteString(ie.Condition.String())
out.WriteString(" ")
out.WriteString(ie.Consequence.String())
if ie.Alternative != nil {
out.WriteString(" else ")
out.WriteString(ie.Alternative.String())
}
return out.String()
}
块语句
// ast/ast.go
// block语句
type BlockStatement struct {
Statements []Statement
}
func (bs *BlockStatement) String() string {
var out bytes.Buffer
out.WriteString("{")
out.WriteString("\n")
for _, s := range bs.Statements {
out.WriteString("\t" + s.String())
}
out.WriteString("}")
return out.String()
}
if表达式解析函数
// parser/parser.go
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
//遇到if调用if解析函数
p.registerPrefix(token.IF, p.parseIfExpression)
// Read two tokens, so curToken and peekToken are both set
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parseIfExpression() ast.Expression {
expression := &ast.IfExpression{}
if !p.expectPeek(token.LPAREN) {
return nil
}
p.nextToken()
expression.Condition = p.parseExpression(LOWEST)
if !p.expectPeek(token.RPAREN) {
return nil
}
if !p.expectPeek(token.LBRACE) {
return nil
}
expression.Consequence = p.parseBlockStatement()
if p.peekTokenIs(token.ELSE) {
p.nextToken()
if !p.expectPeek(token.LBRACE) {
return nil
}
expression.Alternative = p.parseBlockStatement()
}
return expression
}
解析块语句
// parser/parser.go
func (p *Parser) parseBlockStatement() *ast.BlockStatement {
block := &ast.BlockStatement{}
block.Statements = []ast.Statement{}
p.nextToken()
for !p.curTokenIs(token.RBRACE) && !p.curTokenIs(token.EOF) {
stmt := p.parseStatement()
if stmt != nil {
block.Statements = append(block.Statements, stmt)
}
p.nextToken()
}
return block
}
2.8.4 函数字面量
fn <参数列表> <块语句>
函数字面量
// ast/ast.go
// 函数字面量
type FunctionLiteral struct {
Parameters []*Identifier
Body *BlockStatement
}
func (fl *FunctionLiteral) String() string {
var out bytes.Buffer
params := []string{}
for _, p := range fl.Parameters {
params = append(params, p.String())
}
out.WriteString("fn")
out.WriteString("(")
out.WriteString(strings.Join(params, ", "))
out.WriteString(") ")
out.WriteString(fl.Body.String())
return out.String()
}
函数字面量解析函数
// parser/parser.go
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
//遇到fn调用函数字面量解析函数
p.registerPrefix(token.FUNCTION, p.parseFunctionLiteral)
// Read two tokens, so curToken and peekToken are both set
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parseFunctionLiteral() ast.Expression {
lit := &ast.FunctionLiteral{}
if !p.expectPeek(token.LPAREN) {
return nil
}
lit.Parameters = p.parseFunctionParameters()
if !p.expectPeek(token.LBRACE) {
return nil
}
lit.Body = p.parseBlockStatement()
return lit
}
函数字面量参数解析函数
// parser/parser.go
func (p *Parser) parseFunctionParameters() []*ast.Identifier {
identifiers := []*ast.Identifier{}
if p.peekTokenIs(token.RPAREN) {
p.nextToken()
return identifiers
}
p.nextToken()
ident := &ast.Identifier{Token: p.curToken}
identifiers = append(identifiers, ident)
for p.peekTokenIs(token.COMMA) {
p.nextToken()
p.nextToken()
ident := &ast.Identifier{Token: p.curToken}
identifiers = append(identifiers, ident)
}
if !p.expectPeek(token.RPAREN) {
return nil
}
return identifiers
}
2.8.5 调用表达式
<表达式>{逗号分隔的表达式列表}
调用表达式
// ast/ast.go
// 调用表达式
type CallExpression struct {
Function Expression // 标识符或函数字面量
Arguments []Expression
}
func (ce *CallExpression) String() string {
var out bytes.Buffer
args := []string{}
for _, a := range ce.Arguments {
args = append(args, a.String())
}
out.WriteString(ce.Function.String())
out.WriteString("(")
out.WriteString(strings.Join(args, ", "))
out.WriteString(")")
return out.String()
}
调用表达式中缀解析函数
// parser/parser.go
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.infixParseFns = make(map[token.TokenType]infixParseFn)
//标识符后面遇到(调用表达式中缀解析函数
p.registerInfix(token.LPAREN, p.parseCallExpression)
// Read two tokens, so curToken and peekToken are both set
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) parseCallExpression(function ast.Expression) ast.Expression {
exp := &ast.CallExpression{Function: function}
exp.Arguments = p.parseCallArguments()
return exp
}
解析调用表达式中传递的参数
// parser/parser.go
func (p *Parser) parseCallArguments() []ast.Expression {
args := []ast.Expression{}
if p.peekTokenIs(token.RPAREN) {
p.nextToken()
return args
}
p.nextToken()
args = append(args, p.parseExpression(LOWEST))
for p.peekTokenIs(token.COMMA) {
p.nextToken()
p.nextToken()
args = append(args, p.parseExpression(LOWEST))
}
if !p.expectPeek(token.RPAREN) {
return nil
}
return args
}
2.8.6 删除TODO
// parser/parser.go
func (p *Parser) parseLetStatement() *ast.LetStatement {
stmt := &ast.LetStatement{}
if !p.expectPeek(token.IDENT) {
return nil
}
stmt.Name = &ast.Identifier{Token: p.curToken}
if !p.expectPeek(token.ASSIGN) {
return nil
}
p.nextToken()
stmt.Value = p.parseExpression(LOWEST)
if p.peekTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) parseReturnStatement() *ast.ReturnStatement {
stmt := &ast.ReturnStatement{}
p.nextToken()
stmt.ReturnValue = p.parseExpression(LOWEST)
if p.peekTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
2.9 RPPL
Read-Lex-Print Loop(读取-词法分析-打印循环)
// repl/repl.go
package repl
import (
"bufio"
"fmt"
"io"
"monkey/lexer"
"monkey/token"
)
const PROMPT = ">>"
func Start(in io.Reader, out io.Writer) {
scanner := bufio.NewScanner(in)
for {
fmt.Fprintf(out, PROMPT)
scanned := scanner.Scan()
if !scanned {
return
}
line := scanner.Text()
l := lexer.New(line)
p := parser.New(l)
program := p.ParseProgram()
if len(p.Errors()) != 0 {
printParserErrors(out, p.Errors())
continue
}
io.WriteString(out, program.String())
io.WriteString(out, "\n")
}
}
const MONKEY_FACE = ` __,__
.--. .-" "-. .--.
/ .. \/ .-. .-. \/ .. \
| | '| / Y \ |' | |
| \ \ \ 0 | 0 / / / |
\ '- ,\.-"""""""-./, -' /
''-' /_ ^ ^ _\ '-''
| \._ _./ |
\ \ '~' / /
'._ '-=-' _.'
'-----'
`
func printParserErrors(out io.Writer, errors []string) {
io.WriteString(out, MONKEY_FACE)
io.WriteString(out, "Woops! We ran into some monkey business here!\n")
io.WriteString(out, "parser errors:\n")
for _, msg := range errors {
io.WriteString(out, "\t" + msg + "\n")
}
}
2.10 完整工程
monkey/token/token.go
package token
type TokenType string
const (
ILLEGAL = "ILLEGAL"
EOF = "EOF"
// 标识符+字面量
IDENT = "IDENT" // add, foobar, x, y, ...
INT = "INT" // 1343456
// 运算符
ASSIGN = "="
PLUS = "+"
MINUS = "-"
BANG = "!"
ASTERISK = "*"
SLASH = "/"
LT = "<"
GT = ">"
EQ = "=="
NOT_EQ = "!="
// 分隔符
COMMA = ","
SEMICOLON = ";"
LPAREN = "("
RPAREN = ")"
LBRACE = "{"
RBRACE = "}"
// 关键字
FUNCTION = "FUNCTION"
LET = "LET"
TRUE = "TRUE"
FALSE = "FALSE"
IF = "IF"
ELSE = "ELSE"
RETURN = "RETURN"
)
type Token struct {
Type TokenType
Literal string
}
var keywords = map[string]TokenType{
"fn": FUNCTION,
"let": LET,
"true": TRUE,
"false": FALSE,
"if": IF,
"else": ELSE,
"return": RETURN,
}
func LookupIdent(ident string) TokenType {
if tok, ok := keywords[ident]; ok {
return tok
}
return IDENT
}
monkey/lexer/lexer.go
package lexer
import "monkey/token"
type Lexer struct {
input string
position int // 当前字符位置
readPosition int // 下一个字符位置
ch byte // 当前字符
}
func New(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}
func (l *Lexer) NextToken() token.Token {
var tok token.Token
l.skipWhitespace()
switch l.ch {
case '=':
if l.peekChar() == '=' {
ch := l.ch
l.readChar()
literal := string(ch) + string(l.ch)
tok = token.Token{Type: token.EQ, Literal: literal}
} else {
tok = newToken(token.ASSIGN, l.ch)
}
case '+':
tok = newToken(token.PLUS, l.ch)
case '-':
tok = newToken(token.MINUS, l.ch)
case '!':
if l.peekChar() == '=' {
ch := l.ch
l.readChar()
literal := string(ch) + string(l.ch)
tok = token.Token{Type: token.NOT_EQ, Literal: literal}
} else {
tok = newToken(token.BANG, l.ch)
}
case '/':
tok = newToken(token.SLASH, l.ch)
case '*':
tok = newToken(token.ASTERISK, l.ch)
case '<':
tok = newToken(token.LT, l.ch)
case '>':
tok = newToken(token.GT, l.ch)
case ';':
tok = newToken(token.SEMICOLON, l.ch)
case ',':
tok = newToken(token.COMMA, l.ch)
case '{':
tok = newToken(token.LBRACE, l.ch)
case '}':
tok = newToken(token.RBRACE, l.ch)
case '(':
tok = newToken(token.LPAREN, l.ch)
case ')':
tok = newToken(token.RPAREN, l.ch)
case 0:
tok.Literal = ""
tok.Type = token.EOF
default:
if isLetter(l.ch) {
tok.Literal = l.readIdentifier()
tok.Type = token.LookupIdent(tok.Literal)
return tok
} else if isDigit(l.ch) {
tok.Type = token.INT
tok.Literal = l.readNumber()
return tok
} else {
tok = newToken(token.ILLEGAL, l.ch)
}
}
l.readChar()
return tok
}
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
l.ch = 0
} else {
l.ch = l.input[l.readPosition]
}
l.position = l.readPosition
l.readPosition += 1
}
func (l *Lexer) peekChar() byte {
if l.readPosition >= len(l.input) {
return 0
} else {
return l.input[l.readPosition]
}
}
func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
func (l *Lexer) readNumber() string {
position := l.position
for isDigit(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
func newToken(tokenType token.TokenType, ch byte) token.Token {
return token.Token{Type: tokenType, Literal: string(ch)}
}
monkey/lexer/lexer_test.go
package lexer
import (
"testing"
"monkey/token"
)
func TestNextToken(t *testing.T) {
input := `let five = 5;
let ten = 10;
let add = fn(x, y) {
x + y;
};
let result = add(five, ten);
!-/*5;
5 < 10 > 5;
if (5 < 10) {
return true;
} else {
return false;
}
10 == 10;
10 != 9;
`
tests := []struct {
expectedType token.TokenType
expectedLiteral string
}{
{token.LET, "let"},
{token.IDENT, "five"},
{token.ASSIGN, "="},
{token.INT, "5"},
{token.SEMICOLON, ";"},
{token.LET, "let"},
{token.IDENT, "ten"},
{token.ASSIGN, "="},
{token.INT, "10"},
{token.SEMICOLON, ";"},
{token.LET, "let"},
{token.IDENT, "add"},
{token.ASSIGN, "="},
{token.FUNCTION, "fn"},
{token.LPAREN, "("},
{token.IDENT, "x"},
{token.COMMA, ","},
{token.IDENT, "y"},
{token.RPAREN, ")"},
{token.LBRACE, "{"},
{token.IDENT, "x"},
{token.PLUS, "+"},
{token.IDENT, "y"},
{token.SEMICOLON, ";"},
{token.RBRACE, "}"},
{token.SEMICOLON, ";"},
{token.LET, "let"},
{token.IDENT, "result"},
{token.ASSIGN, "="},
{token.IDENT, "add"},
{token.LPAREN, "("},
{token.IDENT, "five"},
{token.COMMA, ","},
{token.IDENT, "ten"},
{token.RPAREN, ")"},
{token.SEMICOLON, ";"},
{token.BANG, "!"},
{token.MINUS, "-"},
{token.SLASH, "/"},
{token.ASTERISK, "*"},
{token.INT, "5"},
{token.SEMICOLON, ";"},
{token.INT, "5"},
{token.LT, "<"},
{token.INT, "10"},
{token.GT, ">"},
{token.INT, "5"},
{token.SEMICOLON, ";"},
{token.IF, "if"},
{token.LPAREN, "("},
{token.INT, "5"},
{token.LT, "<"},
{token.INT, "10"},
{token.RPAREN, ")"},
{token.LBRACE, "{"},
{token.RETURN, "return"},
{token.TRUE, "true"},
{token.SEMICOLON, ";"},
{token.RBRACE, "}"},
{token.ELSE, "else"},
{token.LBRACE, "{"},
{token.RETURN, "return"},
{token.FALSE, "false"},
{token.SEMICOLON, ";"},
{token.RBRACE, "}"},
{token.INT, "10"},
{token.EQ, "=="},
{token.INT, "10"},
{token.SEMICOLON, ";"},
{token.INT, "10"},
{token.NOT_EQ, "!="},
{token.INT, "9"},
{token.SEMICOLON, ";"},
{token.EOF, ""},
}
l := New(input)
for i, tt := range tests {
tok := l.NextToken()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q",
i, tt.expectedType, tok.Type)
}
if tok.Literal != tt.expectedLiteral {
t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q",
i, tt.expectedLiteral, tok.Literal)
}
}
}
monkey/ast/ast.go
package ast
import (
"bytes"
"monkey/token"
"strings"
)
// 基础节点接口
type Node interface {
String() string
}
// 语句
type Statement interface {
Node
}
// 表达式
type Expression interface {
Node
}
// 程序
type Program struct {
Statements []Statement
}
func (p *Program) String() string {
var out bytes.Buffer
for _, s := range p.Statements {
out.WriteString(s.String())
}
return out.String()
}
// let语句
type LetStatement struct {
Name *Identifier // 标识符
Value Expression // 右侧表达式
}
func (ls *LetStatement) String() string {
var out bytes.Buffer
out.WriteString("let ")
out.WriteString(ls.Name.String())
out.WriteString(" = ")
out.WriteString(ls.Value.String())
out.WriteString(";")
out.WriteString("\n")
return out.String()
}
// return语句
type ReturnStatement struct {
ReturnValue Expression //return右边表达式
}
func (rs *ReturnStatement) String() string {
var out bytes.Buffer
out.WriteString("return ")
out.WriteString(rs.ReturnValue.String())
out.WriteString(";")
out.WriteString("\n")
return out.String()
}
// expression语句
type ExpressionStatement struct {
Expression Expression
}
func (es *ExpressionStatement) String() string {
return es.Expression.String() + ";" + "\n"
}
// block语句
type BlockStatement struct {
Statements []Statement
}
func (bs *BlockStatement) String() string {
var out bytes.Buffer
out.WriteString("{")
out.WriteString("\n")
for _, s := range bs.Statements {
out.WriteString("\t" + s.String())
}
out.WriteString("}")
return out.String()
}
// 标识符
type Identifier struct {
Token token.Token // 词法单元
}
func (i *Identifier) String() string { return i.Token.Literal }
// 布尔字面量
type Boolean struct {
Token token.Token
Value bool
}
func (b *Boolean) String() string { return b.Token.Literal }
// 整数字面量
type IntegerLiteral struct {
Token token.Token
Value int64
}
func (il *IntegerLiteral) String() string { return il.Token.Literal }
// 前缀表达式
type PrefixExpression struct {
Token token.Token // The prefix token, e.g. !
Right Expression
}
func (pe *PrefixExpression) String() string {
var out bytes.Buffer
out.WriteString("(")
out.WriteString(pe.Token.Literal)
out.WriteString(pe.Right.String())
out.WriteString(")")
return out.String()
}
// 中缀表达式
type InfixExpression struct {
Token token.Token // The operator token, e.g. +
Left Expression
Right Expression
}
func (ie *InfixExpression) String() string {
var out bytes.Buffer
out.WriteString("(")
out.WriteString(ie.Left.String())
out.WriteString(" " + ie.Token.Literal + " ")
out.WriteString(ie.Right.String())
out.WriteString(")")
return out.String()
}
// if表达式
type IfExpression struct {
Condition Expression
Consequence *BlockStatement
Alternative *BlockStatement
}
func (ie *IfExpression) String() string {
var out bytes.Buffer
out.WriteString("if")
out.WriteString(ie.Condition.String())
out.WriteString(" ")
out.WriteString(ie.Consequence.String())
if ie.Alternative != nil {
out.WriteString(" else ")
out.WriteString(ie.Alternative.String())
}
return out.String()
}
// 函数字面量
type FunctionLiteral struct {
Parameters []*Identifier
Body *BlockStatement
}
func (fl *FunctionLiteral) String() string {
var out bytes.Buffer
params := []string{}
for _, p := range fl.Parameters {
params = append(params, p.String())
}
out.WriteString("fn")
out.WriteString("(")
out.WriteString(strings.Join(params, ", "))
out.WriteString(") ")
out.WriteString(fl.Body.String())
return out.String()
}
// 调用表达式
type CallExpression struct {
Function Expression // 标识符或函数字面量
Arguments []Expression
}
func (ce *CallExpression) String() string {
var out bytes.Buffer
args := []string{}
for _, a := range ce.Arguments {
args = append(args, a.String())
}
out.WriteString(ce.Function.String())
out.WriteString("(")
out.WriteString(strings.Join(args, ", "))
out.WriteString(")")
return out.String()
}
monkey/ast/ast_test.go
package ast
import (
"monkey/token"
"testing"
)
func TestString(t *testing.T) {
program := &Program{
Statements: []Statement{
&LetStatement{
Name: &Identifier{
Token: token.Token{Type: token.IDENT, Literal: "myVar"},
//Value: "myVar",
},
Value: &Identifier{
Token: token.Token{Type: token.IDENT, Literal: "anotherVar"},
//Value: "anotherVar",
},
},
},
}
if program.String() != "let myVar = anotherVar;\n" {
t.Errorf("program.String() wrong. got=%q", program.String())
}
}
monkey/parser/parser.go
package parser
import (
"fmt"
"monkey/ast"
"monkey/lexer"
"monkey/token"
"strconv"
)
const (
_ int = iota
LOWEST
EQUALS // == !=
LESSGREATER // > or <
SUM // + -
PRODUCT // * /
PREFIX // -X or !X
CALL // myFunction(X)
)
var precedences = map[token.TokenType]int{
token.EQ: EQUALS,
token.NOT_EQ: EQUALS,
token.LT: LESSGREATER,
token.GT: LESSGREATER,
token.PLUS: SUM,
token.MINUS: SUM,
token.SLASH: PRODUCT,
token.ASTERISK: PRODUCT,
token.LPAREN: CALL,
}
type (
prefixParseFn func() ast.Expression
infixParseFn func(ast.Expression) ast.Expression
)
type Parser struct {
l *lexer.Lexer
errors []string
curToken token.Token
peekToken token.Token
prefixParseFns map[token.TokenType]prefixParseFn
infixParseFns map[token.TokenType]infixParseFn
}
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
p.registerPrefix(token.IDENT, p.parseIdentifier)
p.registerPrefix(token.INT, p.parseIntegerLiteral)
p.registerPrefix(token.BANG, p.parsePrefixExpression)
p.registerPrefix(token.MINUS, p.parsePrefixExpression)
p.registerPrefix(token.TRUE, p.parseBoolean)
p.registerPrefix(token.FALSE, p.parseBoolean)
p.registerPrefix(token.LPAREN, p.parseGroupedExpression)
p.registerPrefix(token.IF, p.parseIfExpression)
p.registerPrefix(token.FUNCTION, p.parseFunctionLiteral)
p.infixParseFns = make(map[token.TokenType]infixParseFn)
p.registerInfix(token.PLUS, p.parseInfixExpression)
p.registerInfix(token.MINUS, p.parseInfixExpression)
p.registerInfix(token.SLASH, p.parseInfixExpression)
p.registerInfix(token.ASTERISK, p.parseInfixExpression)
p.registerInfix(token.EQ, p.parseInfixExpression)
p.registerInfix(token.NOT_EQ, p.parseInfixExpression)
p.registerInfix(token.LT, p.parseInfixExpression)
p.registerInfix(token.GT, p.parseInfixExpression)
p.registerInfix(token.LPAREN, p.parseCallExpression)
// Read two tokens, so curToken and peekToken are both set
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) nextToken() {
p.curToken = p.peekToken
p.peekToken = p.l.NextToken()
}
func (p *Parser) curTokenIs(t token.TokenType) bool {
return p.curToken.Type == t
}
func (p *Parser) peekTokenIs(t token.TokenType) bool {
return p.peekToken.Type == t
}
func (p *Parser) expectPeek(t token.TokenType) bool {
if p.peekTokenIs(t) {
p.nextToken()
return true
} else {
p.peekError(t)
return false
}
}
func (p *Parser) Errors() []string {
return p.errors
}
func (p *Parser) peekError(t token.TokenType) {
msg := fmt.Sprintf("expected next token to be %s, got %s instead",
t, p.peekToken.Type)
p.errors = append(p.errors, msg)
}
func (p *Parser) noPrefixParseFnError(t token.TokenType) {
msg := fmt.Sprintf("no prefix parse function for %s found", t)
p.errors = append(p.errors, msg)
}
func (p *Parser) ParseProgram() *ast.Program {
program := &ast.Program{}
program.Statements = []ast.Statement{}
for !p.curTokenIs(token.EOF) {
stmt := p.parseStatement()
if stmt != nil {
program.Statements = append(program.Statements, stmt)
}
p.nextToken()
}
return program
}
func (p *Parser) parseStatement() ast.Statement {
switch p.curToken.Type {
case token.SEMICOLON:
return nil
case token.LET:
return p.parseLetStatement()
case token.RETURN:
return p.parseReturnStatement()
default:
return p.parseExpressionStatement()
}
}
func (p *Parser) parseLetStatement() *ast.LetStatement {
stmt := &ast.LetStatement{}
if !p.expectPeek(token.IDENT) {
return nil
}
stmt.Name = &ast.Identifier{Token: p.curToken}
if !p.expectPeek(token.ASSIGN) {
return nil
}
p.nextToken()
stmt.Value = p.parseExpression(LOWEST)
if p.peekTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) parseReturnStatement() *ast.ReturnStatement {
stmt := &ast.ReturnStatement{}
p.nextToken()
stmt.ReturnValue = p.parseExpression(LOWEST)
if p.peekTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement {
//defer untrace(trace("parseExpressionStatement"))
stmt := &ast.ExpressionStatement{}
stmt.Expression = p.parseExpression(LOWEST)
if p.peekTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) parseExpression(precedence int) ast.Expression {
//defer untrace(trace("parseExpression"))
prefix := p.prefixParseFns[p.curToken.Type]
if prefix == nil {
p.noPrefixParseFnError(p.curToken.Type)
return nil
}
leftExp := prefix()
for !p.peekTokenIs(token.SEMICOLON) && precedence < p.peekPrecedence() {
infix := p.infixParseFns[p.peekToken.Type]
if infix == nil {
return leftExp
}
p.nextToken()
leftExp = infix(leftExp)
}
return leftExp
}
func (p *Parser) peekPrecedence() int {
if p, ok := precedences[p.peekToken.Type]; ok {
return p
}
return LOWEST
}
func (p *Parser) curPrecedence() int {
if p, ok := precedences[p.curToken.Type]; ok {
return p
}
return LOWEST
}
func (p *Parser) parseIdentifier() ast.Expression {
//defer untrace(trace("parseIdentifier"))
return &ast.Identifier{Token: p.curToken}
}
func (p *Parser) parseIntegerLiteral() ast.Expression {
//defer untrace(trace("parseIntegerLiteral"))
lit := &ast.IntegerLiteral{Token: p.curToken}
value, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
if err != nil {
msg := fmt.Sprintf("could not parse %q as integer", p.curToken.Literal)
p.errors = append(p.errors, msg)
return nil
}
lit.Value = value
return lit
}
func (p *Parser) parsePrefixExpression() ast.Expression {
//defer untrace(trace("parsePrefixExpression"))
expression := &ast.PrefixExpression{
Token: p.curToken,
}
p.nextToken()
expression.Right = p.parseExpression(PREFIX)
return expression
}
func (p *Parser) parseInfixExpression(left ast.Expression) ast.Expression {
//defer untrace(trace("parseInfixExpression"))
expression := &ast.InfixExpression{
Token: p.curToken,
Left: left,
}
precedence := p.curPrecedence()
p.nextToken()
expression.Right = p.parseExpression(precedence)
return expression
}
func (p *Parser) parseBoolean() ast.Expression {
return &ast.Boolean{Token: p.curToken, Value: p.curTokenIs(token.TRUE)}
}
func (p *Parser) parseGroupedExpression() ast.Expression {
p.nextToken()
exp := p.parseExpression(LOWEST)
if !p.expectPeek(token.RPAREN) {
return nil
}
return exp
}
func (p *Parser) parseIfExpression() ast.Expression {
expression := &ast.IfExpression{}
if !p.expectPeek(token.LPAREN) {
return nil
}
p.nextToken()
expression.Condition = p.parseExpression(LOWEST)
if !p.expectPeek(token.RPAREN) {
return nil
}
if !p.expectPeek(token.LBRACE) {
return nil
}
expression.Consequence = p.parseBlockStatement()
if p.peekTokenIs(token.ELSE) {
p.nextToken()
if !p.expectPeek(token.LBRACE) {
return nil
}
expression.Alternative = p.parseBlockStatement()
}
return expression
}
func (p *Parser) parseBlockStatement() *ast.BlockStatement {
block := &ast.BlockStatement{}
block.Statements = []ast.Statement{}
p.nextToken()
for !p.curTokenIs(token.RBRACE) && !p.curTokenIs(token.EOF) {
stmt := p.parseStatement()
if stmt != nil {
block.Statements = append(block.Statements, stmt)
}
p.nextToken()
}
return block
}
func (p *Parser) parseFunctionLiteral() ast.Expression {
lit := &ast.FunctionLiteral{}
if !p.expectPeek(token.LPAREN) {
return nil
}
lit.Parameters = p.parseFunctionParameters()
if !p.expectPeek(token.LBRACE) {
return nil
}
lit.Body = p.parseBlockStatement()
return lit
}
func (p *Parser) parseFunctionParameters() []*ast.Identifier {
identifiers := []*ast.Identifier{}
if p.peekTokenIs(token.RPAREN) {
p.nextToken()
return identifiers
}
p.nextToken()
ident := &ast.Identifier{Token: p.curToken}
identifiers = append(identifiers, ident)
for p.peekTokenIs(token.COMMA) {
p.nextToken()
p.nextToken()
ident := &ast.Identifier{Token: p.curToken}
identifiers = append(identifiers, ident)
}
if !p.expectPeek(token.RPAREN) {
return nil
}
return identifiers
}
func (p *Parser) parseCallExpression(function ast.Expression) ast.Expression {
exp := &ast.CallExpression{Function: function}
exp.Arguments = p.parseCallArguments()
return exp
}
func (p *Parser) parseCallArguments() []ast.Expression {
args := []ast.Expression{}
if p.peekTokenIs(token.RPAREN) {
p.nextToken()
return args
}
p.nextToken()
args = append(args, p.parseExpression(LOWEST))
for p.peekTokenIs(token.COMMA) {
p.nextToken()
p.nextToken()
args = append(args, p.parseExpression(LOWEST))
}
if !p.expectPeek(token.RPAREN) {
return nil
}
return args
}
func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) {
p.prefixParseFns[tokenType] = fn
}
func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) {
p.infixParseFns[tokenType] = fn
}
monkey/parser/parser_test.go
package parser
import (
"fmt"
"monkey/ast"
"monkey/lexer"
"testing"
)
func checkParserErrors(t *testing.T, p *Parser) {
errors := p.Errors()
if len(errors) == 0 {
return
}
t.Errorf("parser has %d errors", len(errors))
for _, msg := range errors {
t.Errorf("parser error: %q", msg)
}
t.FailNow()
}
func testLetStatement(t *testing.T, s ast.Statement, name string) bool {
letStmt, ok := s.(*ast.LetStatement)
if !ok {
t.Errorf("s not *ast.LetStatement. got=%T", s)
return false
}
if letStmt.Name.String() != name {
t.Errorf("letStmt.Name.String() not '%s'. got=%s", name, letStmt.Name.String())
return false
}
if letStmt.Name.Token.Literal != name {
t.Errorf("letStmt.Name.Token.Literal not '%s'. got=%s", name, letStmt.Name.Token.Literal)
return false
}
return true
}
func testIntegerLiteral(t *testing.T, il ast.Expression, value int64) bool {
integ, ok := il.(*ast.IntegerLiteral)
if !ok {
t.Errorf("il not *ast.IntegerLiteral. got=%T", il)
return false
}
if integ.Value != value {
t.Errorf("integ.Value not %d. got=%d", value, integ.Value)
return false
}
if integ.String() != fmt.Sprintf("%d", value) {
t.Errorf("integ.String() not %d. got=%s", value, integ.String())
return false
}
return true
}
func testBooleanLiteral(t *testing.T, exp ast.Expression, value bool) bool {
bo, ok := exp.(*ast.Boolean)
if !ok {
t.Errorf("exp not *ast.Boolean. got=%T", exp)
return false
}
if bo.Value != value {
t.Errorf("bo.Value not %t. got=%t", value, bo.Value)
return false
}
if bo.String() != fmt.Sprintf("%t", value) {
t.Errorf("bo.String() not %t. got=%s", value, bo.String())
return false
}
return true
}
func testIdentifier(t *testing.T, exp ast.Expression, value string) bool {
ident, ok := exp.(*ast.Identifier)
if !ok {
t.Errorf("exp not *ast.Identifier. got=%T", exp)
return false
}
if ident.String() != value {
t.Errorf("ident.String() not %s. got=%s", value,
ident.String())
return false
}
return true
}
func testLiteralExpression(
t *testing.T,
exp ast.Expression,
expected interface{},
) bool {
switch v := expected.(type) {
case int:
return testIntegerLiteral(t, exp, int64(v))
case int64:
return testIntegerLiteral(t, exp, v)
case string:
return testIdentifier(t, exp, v)
case bool:
return testBooleanLiteral(t, exp, v)
}
t.Errorf("type of exp not handled. got=%T", exp)
return false
}
func testInfixExpression(t *testing.T, exp ast.Expression, left interface{},
operator string, right interface{}) bool {
opExp, ok := exp.(*ast.InfixExpression)
if !ok {
t.Errorf("exp is not ast.InfixExpression. got=%T(%s)", exp, exp)
return false
}
if !testLiteralExpression(t, opExp.Left, left) {
return false
}
if opExp.Token.Literal != operator {
t.Errorf("exp.Token.Literal is not '%s'. got=%q", operator, opExp.Token.Literal)
return false
}
if !testLiteralExpression(t, opExp.Right, right) {
return false
}
return true
}
func TestLetStatements(t *testing.T) {
tests := []struct {
input string
expectedIdentifier string
expectedValue interface{}
}{
{"let x = 5;", "x", 5},
{"let y = true;", "y", true},
{"let foobar = y;", "foobar", "y"},
}
for _, tt := range tests {
l := lexer.New(tt.input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program.Statements does not contain 1 statements. got=%d",
len(program.Statements))
}
stmt := program.Statements[0]
if !testLetStatement(t, stmt, tt.expectedIdentifier) {
return
}
val := stmt.(*ast.LetStatement).Value
if !testLiteralExpression(t, val, tt.expectedValue) {
return
}
}
}
func TestReturnStatements(t *testing.T) {
tests := []struct {
input string
expectedValue interface{}
}{
{"return 5;", 5},
{"return true;", true},
{"return foobar;", "foobar"},
}
for _, tt := range tests {
l := lexer.New(tt.input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program.Statements does not contain 1 statements. got=%d",
len(program.Statements))
}
stmt := program.Statements[0]
returnStmt, ok := stmt.(*ast.ReturnStatement)
if !ok {
t.Fatalf("stmt not *ast.ReturnStatement. got=%T", stmt)
}
if testLiteralExpression(t, returnStmt.ReturnValue, tt.expectedValue) {
return
}
}
}
func TestIdentifierExpression(t *testing.T) {
input := "foobar;"
l := lexer.New(input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program has not enough statements. got=%d",
len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
ident, ok := stmt.Expression.(*ast.Identifier)
if !ok {
t.Fatalf("exp not *ast.Identifier. got=%T", stmt.Expression)
}
if ident.String() != "foobar" {
t.Errorf("ident.String() not %s. got=%s", "foobar",
ident.String())
}
}
func TestIntegerLiteralExpression(t *testing.T) {
input := "5;"
l := lexer.New(input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program has not enough statements. got=%d",
len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
literal, ok := stmt.Expression.(*ast.IntegerLiteral)
if !ok {
t.Fatalf("exp not *ast.IntegerLiteral. got=%T", stmt.Expression)
}
if literal.Value != 5 {
t.Errorf("literal.Value not %d. got=%d", 5, literal.Value)
}
if literal.String() != "5" {
t.Errorf("literal.String() not %s. got=%s", "5",
literal.String())
}
}
func TestParsingPrefixExpressions(t *testing.T) {
prefixTests := []struct {
input string
operator string
value interface{}
}{
{"!5;", "!", 5},
{"-15;", "-", 15},
{"!foobar;", "!", "foobar"},
{"-foobar;", "-", "foobar"},
{"!true;", "!", true},
{"!false;", "!", false},
}
for _, tt := range prefixTests {
l := lexer.New(tt.input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program.Statements does not contain %d statements. got=%d\n",
1, len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
exp, ok := stmt.Expression.(*ast.PrefixExpression)
if !ok {
t.Fatalf("stmt is not ast.PrefixExpression. got=%T", stmt.Expression)
}
if exp.Token.Literal != tt.operator {
t.Fatalf("exp.Token.Literal is not '%s'. got=%s",
tt.operator, exp.Token.Literal)
}
if !testLiteralExpression(t, exp.Right, tt.value) {
return
}
}
}
func TestParsingInfixExpressions(t *testing.T) {
infixTests := []struct {
input string
leftValue interface{}
operator string
rightValue interface{}
}{
{"5 + 5;", 5, "+", 5},
{"5 - 5;", 5, "-", 5},
{"5 * 5;", 5, "*", 5},
{"5 / 5;", 5, "/", 5},
{"5 > 5;", 5, ">", 5},
{"5 < 5;", 5, "<", 5},
{"5 == 5;", 5, "==", 5},
{"5 != 5;", 5, "!=", 5},
{"foobar + barfoo;", "foobar", "+", "barfoo"},
{"foobar - barfoo;", "foobar", "-", "barfoo"},
{"foobar * barfoo;", "foobar", "*", "barfoo"},
{"foobar / barfoo;", "foobar", "/", "barfoo"},
{"foobar > barfoo;", "foobar", ">", "barfoo"},
{"foobar < barfoo;", "foobar", "<", "barfoo"},
{"foobar == barfoo;", "foobar", "==", "barfoo"},
{"foobar != barfoo;", "foobar", "!=", "barfoo"},
{"true == true", true, "==", true},
{"true != false", true, "!=", false},
{"false == false", false, "==", false},
}
for _, tt := range infixTests {
l := lexer.New(tt.input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program.Statements does not contain %d statements. got=%d\n",
1, len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
if !testInfixExpression(t, stmt.Expression, tt.leftValue,
tt.operator, tt.rightValue) {
return
}
}
}
func TestOperatorPrecedenceParsing(t *testing.T) {
tests := []struct {
input string
expected string
}{
{
"-a * b",
"((-a) * b);\n",
},
{
"!-a",
"(!(-a));\n",
},
{
"a + b + c",
"((a + b) + c);\n",
},
{
"a + b - c",
"((a + b) - c);\n",
},
{
"a * b * c",
"((a * b) * c);\n",
},
{
"a * b / c",
"((a * b) / c);\n",
},
{
"a + b / c",
"(a + (b / c));\n",
},
{
"a + b * c + d / e - f",
"(((a + (b * c)) + (d / e)) - f);\n",
},
{
"3 + 4; -5 * 5",
"(3 + 4);\n((-5) * 5);\n",
},
{
"5 > 4 == 3 < 4",
"((5 > 4) == (3 < 4));\n",
},
{
"5 < 4 != 3 > 4",
"((5 < 4) != (3 > 4));\n",
},
{
"3 + 4 * 5 == 3 * 1 + 4 * 5",
"((3 + (4 * 5)) == ((3 * 1) + (4 * 5)));\n",
},
{
"true",
"true;\n",
},
{
"false",
"false;\n",
},
{
"3 > 5 == false",
"((3 > 5) == false);\n",
},
{
"3 < 5 == true",
"((3 < 5) == true);\n",
},
{
"1 + (2 + 3) + 4",
"((1 + (2 + 3)) + 4);\n",
},
{
"(5 + 5) * 2",
"((5 + 5) * 2);\n",
},
{
"2 / (5 + 5)",
"(2 / (5 + 5));\n",
},
{
"(5 + 5) * 2 * (5 + 5)",
"(((5 + 5) * 2) * (5 + 5));\n",
},
{
"-(5 + 5)",
"(-(5 + 5));\n",
},
{
"!(true == true)",
"(!(true == true));\n",
},
{
"a + add(b * c) + d",
"((a + add((b * c))) + d);\n",
},
{
"add(a, b, 1, 2 * 3, 4 + 5, add(6, 7 * 8))",
"add(a, b, 1, (2 * 3), (4 + 5), add(6, (7 * 8)));\n",
},
{
"add(a + b + c * d / f + g)",
"add((((a + b) + ((c * d) / f)) + g));\n",
},
}
for _, tt := range tests {
l := lexer.New(tt.input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
actual := program.String()
if actual != tt.expected {
t.Errorf("expected=%q, got=%q", tt.expected, actual)
}
}
}
func TestBooleanExpression(t *testing.T) {
tests := []struct {
input string
expectedBoolean bool
}{
{"true;", true},
{"false;", false},
}
for _, tt := range tests {
l := lexer.New(tt.input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program has not enough statements. got=%d",
len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
boolean, ok := stmt.Expression.(*ast.Boolean)
if !ok {
t.Fatalf("exp not *ast.Boolean. got=%T", stmt.Expression)
}
if boolean.Value != tt.expectedBoolean {
t.Errorf("boolean.Value not %t. got=%t", tt.expectedBoolean,
boolean.Value)
}
}
}
func TestIfExpression(t *testing.T) {
input := `if (x < y) { x }`
l := lexer.New(input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program.Statements does not contain %d statements. got=%d\n",
1, len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
exp, ok := stmt.Expression.(*ast.IfExpression)
if !ok {
t.Fatalf("stmt.Expression is not ast.IfExpression. got=%T",
stmt.Expression)
}
if !testInfixExpression(t, exp.Condition, "x", "<", "y") {
return
}
if len(exp.Consequence.Statements) != 1 {
t.Errorf("consequence is not 1 statements. got=%d\n",
len(exp.Consequence.Statements))
}
consequence, ok := exp.Consequence.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("Statements[0] is not ast.ExpressionStatement. got=%T",
exp.Consequence.Statements[0])
}
if !testIdentifier(t, consequence.Expression, "x") {
return
}
if exp.Alternative != nil {
t.Errorf("exp.Alternative.Statements was not nil. got=%+v", exp.Alternative)
}
}
func TestIfElseExpression(t *testing.T) {
input := `if (x < y) { x } else { y }`
l := lexer.New(input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program.Statements does not contain %d statements. got=%d\n",
1, len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
exp, ok := stmt.Expression.(*ast.IfExpression)
if !ok {
t.Fatalf("stmt.Expression is not ast.IfExpression. got=%T", stmt.Expression)
}
if !testInfixExpression(t, exp.Condition, "x", "<", "y") {
return
}
if len(exp.Consequence.Statements) != 1 {
t.Errorf("consequence is not 1 statements. got=%d\n",
len(exp.Consequence.Statements))
}
consequence, ok := exp.Consequence.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("Statements[0] is not ast.ExpressionStatement. got=%T",
exp.Consequence.Statements[0])
}
if !testIdentifier(t, consequence.Expression, "x") {
return
}
if len(exp.Alternative.Statements) != 1 {
t.Errorf("exp.Alternative.Statements does not contain 1 statements. got=%d\n",
len(exp.Alternative.Statements))
}
alternative, ok := exp.Alternative.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("Statements[0] is not ast.ExpressionStatement. got=%T",
exp.Alternative.Statements[0])
}
if !testIdentifier(t, alternative.Expression, "y") {
return
}
}
func TestFunctionLiteralParsing(t *testing.T) {
input := `fn(x, y) { x + y; }`
l := lexer.New(input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program.Statements does not contain %d statements. got=%d\n",
1, len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
function, ok := stmt.Expression.(*ast.FunctionLiteral)
if !ok {
t.Fatalf("stmt.Expression is not ast.FunctionLiteral. got=%T",
stmt.Expression)
}
if len(function.Parameters) != 2 {
t.Fatalf("function literal parameters wrong. want 2, got=%d\n",
len(function.Parameters))
}
testLiteralExpression(t, function.Parameters[0], "x")
testLiteralExpression(t, function.Parameters[1], "y")
if len(function.Body.Statements) != 1 {
t.Fatalf("function.Body.Statements has not 1 statements. got=%d\n",
len(function.Body.Statements))
}
bodyStmt, ok := function.Body.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("function body stmt is not ast.ExpressionStatement. got=%T",
function.Body.Statements[0])
}
testInfixExpression(t, bodyStmt.Expression, "x", "+", "y")
}
func TestFunctionParameterParsing(t *testing.T) {
tests := []struct {
input string
expectedParams []string
}{
{input: "fn() {};", expectedParams: []string{}},
{input: "fn(x) {};", expectedParams: []string{"x"}},
{input: "fn(x, y, z) {};", expectedParams: []string{"x", "y", "z"}},
}
for _, tt := range tests {
l := lexer.New(tt.input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
stmt := program.Statements[0].(*ast.ExpressionStatement)
function := stmt.Expression.(*ast.FunctionLiteral)
if len(function.Parameters) != len(tt.expectedParams) {
t.Errorf("length parameters wrong. want %d, got=%d\n",
len(tt.expectedParams), len(function.Parameters))
}
for i, ident := range tt.expectedParams {
testLiteralExpression(t, function.Parameters[i], ident)
}
}
}
func TestCallExpressionParsing(t *testing.T) {
input := "add(1, 2 * 3, 4 + 5);"
l := lexer.New(input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("program.Statements does not contain %d statements. got=%d\n",
1, len(program.Statements))
}
stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
if !ok {
t.Fatalf("stmt is not ast.ExpressionStatement. got=%T",
program.Statements[0])
}
exp, ok := stmt.Expression.(*ast.CallExpression)
if !ok {
t.Fatalf("stmt.Expression is not ast.CallExpression. got=%T",
stmt.Expression)
}
if !testIdentifier(t, exp.Function, "add") {
return
}
if len(exp.Arguments) != 3 {
t.Fatalf("wrong length of arguments. got=%d", len(exp.Arguments))
}
testLiteralExpression(t, exp.Arguments[0], 1)
testInfixExpression(t, exp.Arguments[1], 2, "*", 3)
testInfixExpression(t, exp.Arguments[2], 4, "+", 5)
}
func TestCallExpressionParameterParsing(t *testing.T) {
tests := []struct {
input string
expectedIdent string
expectedArgs []string
}{
{
input: "add();",
expectedIdent: "add",
expectedArgs: []string{},
},
{
input: "add(1);",
expectedIdent: "add",
expectedArgs: []string{"1"},
},
{
input: "add(1, 2 * 3, 4 + 5);",
expectedIdent: "add",
expectedArgs: []string{"1", "(2 * 3)", "(4 + 5)"},
},
}
for _, tt := range tests {
l := lexer.New(tt.input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
stmt := program.Statements[0].(*ast.ExpressionStatement)
exp, ok := stmt.Expression.(*ast.CallExpression)
if !ok {
t.Fatalf("stmt.Expression is not ast.CallExpression. got=%T",
stmt.Expression)
}
if !testIdentifier(t, exp.Function, tt.expectedIdent) {
return
}
if len(exp.Arguments) != len(tt.expectedArgs) {
t.Fatalf("wrong number of arguments. want=%d, got=%d",
len(tt.expectedArgs), len(exp.Arguments))
}
for i, arg := range tt.expectedArgs {
if exp.Arguments[i].String() != arg {
t.Errorf("argument %d wrong. want=%q, got=%q", i,
arg, exp.Arguments[i].String())
}
}
}
}
monkey/parser/parser_tracing.go
package parser
import (
"fmt"
"strings"
)
const traceIdentPlaceholder string = "\t"
var traceLevel int = 0
func identLevel() string {
return strings.Repeat(traceIdentPlaceholder, traceLevel-1)
}
func tracePrint(fs string) {
fmt.Printf("%s%s\n", identLevel(), fs)
}
func incIdent() { traceLevel += 1 }
func decIdent() { traceLevel -= 1 }
func trace(msg string) string {
incIdent()
tracePrint("BEGIN " + msg)
return msg
}
func untrace(msg string) {
tracePrint("END " + msg)
decIdent()
}
monkey/repl/repl.go
package repl
import (
"bufio"
"fmt"
"io"
"monkey/lexer"
"monkey/parser"
)
const PROMPT = ">>"
func Start(in io.Reader, out io.Writer) {
scanner := bufio.NewScanner(in)
for {
fmt.Fprintf(out, PROMPT)
scanned := scanner.Scan()
if !scanned {
return
}
line := scanner.Text()
l := lexer.New(line)
p := parser.New(l)
program := p.ParseProgram()
if len(p.Errors()) != 0 {
printParserErrors(out, p.Errors())
continue
}
io.WriteString(out, program.String())
}
}
const MONKEY_FACE = ` __,__
.--. .-" "-. .--.
/ .. \/ .-. .-. \/ .. \
| | '| / Y \ |' | |
| \ \ \ 0 | 0 / / / |
\ '- ,\.-"""""""-./, -' /
''-' /_ ^ ^ _\ '-''
| \._ _./ |
\ \ '~' / /
'._ '-=-' _.'
'-----'
`
func printParserErrors(out io.Writer, errors []string) {
io.WriteString(out, MONKEY_FACE)
io.WriteString(out, "Woops! We ran into some monkey business here!\n")
io.WriteString(out, "parser errors:\n")
for _, msg := range errors {
io.WriteString(out, "\t" + msg + "\n")
}
}
monkey/main.go
package main
import (
"fmt"
"monkey/repl"
"os"
"os/user"
)
func main() {
user, err := user.Current()
if err != nil {
panic(err)
}
fmt.Printf("Hello %s! This is the Monkey programming language!\n", user.Username)
fmt.Printf("Feel free to type in commands\n")
repl.Start(os.Stdin, os.Stdout)
}
monkey/run.txt
cd monkey
go mod init monkey
go test .\lexer -count=1
go test .\ast -count=1
go test .\parser -count=1
//parser/parser.go文件中
//parseExpressionStatement(),首行添加defer untrace(trace("parseExpressionStatement"))
//parseExpression(),首行添加defer untrace(trace("parseExpression"))
//parseIdentifier(),首行添加defer untrace(trace("parseIdentifier"))
//parseIntegerLiteral(),首行添加defer untrace(trace("parseIntegerLiteral"))
//parsePrefixExpression(),首行添加defer untrace(trace("parsePrefixExpression"))
//parseInfixExpression(),首行添加defer untrace(trace("parseInfixExpression"))
//......
go test -v -run TestOperatorPrecedenceParsing .\parser -count=1
go run main.go