上文中,我们已经实现了一个能够进行整数加减乘除括号运算的计算器,代码中有两个类,Lexer 和 Interpreter
其中 Lexer 实现字符流到标记流(token)的转换,Interpreter完成标记流的运算
本文中进一步对代码重构,增加一个中间过程 Parser类,将Lexer输出的标签流转换成抽象语法树(AST),AST 实际上定义好了元素之间的运算顺序,最后由 Interpreter 对AST完成运算
抽象语法树与解析树的区别
整数计算器 Version 11
AST
将运算节点与叶子节点区分
import json
class AST:
pass
class BiOp(AST):
def __init__(self, node_type, left_node, right_node):
self.type = node_type
self.left_node = left_node
self.right_node = right_node
def __repr__(self):
return f'{{"type":"{self.type}", "left":{self.left_node}, "right":{self.right_node}}}'
__str__ = __repr__
class Leaf(AST):
def __init__(self, node_type, value):
self.type = node_type
self.value = value
def __repr__(self):
return f'{{"type":"{self.type}", "value":{self.value}}}'
__str__ = __repr__
Lexer 和 Parser
"""
expr: term((PLUS|MINUS)term)*
term: factor(MUL|DIV)factor)*
factor: NUMBER| LPAR expr RPAR
"""
NUMBER, PLUS, MINUS, MUL, DIV, LPAR, RPAR, EOF = 'NUMBER', 'PLUS', 'MINUS', 'MUL', 'DIV', '(', ')', 'EOF'
class Token:
def __init__(self, token_type, value):
self.type = token_type
self.value = value
def __repr__(self,):
return f'Token({self.type}, {self.value})'
class Lexer:
def __init__(self, text):
self.text = text
self.pos = 0
self.current_char = self.text[self.pos]
def error(self,):
raise Exception('Lexer Error!')
def get_next_token(self, ):
if self.pos >= len(self.text):
return Token(EOF, None)
self.current_char = self.text[self.pos]
if self.current_char == ' ':
while self.current_char == ' ':
self.pos += 1
if self.pos >= len(self.text):
return Token(EOF, None)
self.current_char = self.text[self.pos]
if self.current_char.isdigit():
number = ''
while self.current_char.isdigit():
number += self.current_char
self.pos += 1
if self.pos >= len(self.text):
break
self.current_char = self.text[self.pos]
return Token(NUMBER, int(number))
if self.current_char == '+':
self.pos += 1
return Token(PLUS, '+')
if self.current_char == '-':
self.pos += 1
return Token(MINUS, '-')
if self.current_char == '*':
self.pos += 1
return Token(MUL, '*')
if self.current_char == '/':
self.pos += 1
return Token(DIV, '/')
if self.current_char == '(':
self.pos += 1
return Token(LPAR, '(')
if self.current_char == ')':
self.pos += 1
return Token(RPAR, ')')
self.error()
class Parser:
def __init__(self, lexer):
self.lexer = lexer
self.current_token = self.lexer.get_next_token()
def error(self, msg = ''):
raise Exception(f'Parse Error! {msg}')
def eat(self, token_type):
if type(token_type) == list and self.current_token.type not in token_type:
self.error(f'current token not in list, expect {token_type}, got {self.current_token}.')
elif type(token_type) == str and self.current_token.type != token_type:
self.error(f'current token not match, expect {token_type}, got {self.current_token}.')
else:
self.current_token = self.lexer.get_next_token()
def factor(self,):
f = self.current_token
if f.type == NUMBER:
self.eat(NUMBER)
return Leaf(NUMBER, f.value)
elif f.type == LPAR:
self.eat(LPAR)
expr = self.expr()
self.eat(RPAR)
return expr
self.error()
def term(self,):
"""
term: factor(MUL|DIV)factor)*
"""
node = self.factor()
while(self.current_token.type in [MUL, DIV]):
op = self.current_token
self.eat([MUL, DIV])
f = self.factor()
_node = BiOp(op.type, node, f)
node = _node
return node
def expr(self,):
node = self.term()
while(self.current_token.type in [PLUS, MINUS]):
op = self.current_token
self.eat([PLUS, MINUS])
t = self.term()
_node = BiOp(op.type, node, t)
node = _node
return node
def main():
while True:
try:
text = input('cal> ')
lexer = Lexer(text)
parser = Parser(lexer)
ast = parser.expr()
print(json.dumps(json.loads(str(ast)), indent=4))
except Exception as e:
print(e)
break
main()
运行结果如下,用json的形式将AST展示出来
cal> 1 + 2
{
"type": "PLUS",
"left": {
"type": "NUMBER",
"value": 1
},
"right": {
"type": "NUMBER",
"value": 2
}
}
cal> 1 + ((2 + 3) * 4 ) / (5 - 6)
{
"type": "PLUS",
"left": {
"type": "NUMBER",
"value": 1
},
"right": {
"type": "DIV",
"left": {
"type": "MUL",
"left": {
"type": "PLUS",
"left": {
"type": "NUMBER",
"value": 2
},
"right": {
"type": "NUMBER",
"value": 3
}
},
"right": {
"type": "NUMBER",
"value": 4
}
},
"right": {
"type": "MINUS",
"left": {
"type": "NUMBER",
"value": 5
},
"right": {
"type": "NUMBER",
"value": 6
}
}
}
}
Interpreter
根据Parser给出的AST,遍历直接运算
class Interpreter:
def __init__(self, ast):
self.ast = ast
def compute(self):
return self.visit_node(self.ast)
def visit_node(self, node):
if node.type == NUMBER:
return node.value
if node.type == PLUS:
return self.visit_node(node.left_node) + self.visit_node(node.right_node)
if node.type == MINUS:
return self.visit_node(node.left_node) - self.visit_node(node.right_node)
if node.type == MUL:
return self.visit_node(node.left_node) * self.visit_node(node.right_node)
if node.type == DIV:
return self.visit_node(node.left_node) / self.visit_node(node.right_node)