python 抽象语法树_Python随笔(四)抽象语法树AST

本文介绍了Python的抽象语法树(AST),它是源代码抽象语法结构的树状表现形式。文章通过实例展示了如何构建和使用AST,包括使用AST编辑器观察AST、Python执行过程中的AST转化、以及表达式树的实现。同时,文中还提供了栈数据结构和二叉树的Python实现,并通过表达式树解析四则运算表达式。此外,文章讨论了AST在IDE中的应用,如语法检查、代码补全等,并简述了Python虚拟机的执行框架。
摘要由CSDN通过智能技术生成

什么是抽象语法树嘞?

在计算机科学中,抽象语法和抽象语法树其实是源代码的抽象语法结构的树状表现形式

我们可以用一个在线的AST编辑器来观察AST的构建

Python语言的执行过程就是通过将Python字节码转化为抽象语法树来进行下一步的分析等其他操作,所以将Python转化为抽象语法树更利于程序的分析

一般来说,我们早期的学习当中固然会用到一种叫做表达式树的东西,我们用Python来实现一下表达式树

class StackEmptyException(Exception): pass

class StackFullException(Exception): pass

class Node:

def __init__(self, val=None, nxt=None):

self.value = val

self.next = nxt

def __str__(self):

return str(self.value)

class Stack:

def __init__(self, max=0):

self._top = None

self._max = 0

self.max = max

@property

def max(self):

return self._max

@max.setter

def max(self, m):

m = int(m)

if m < self.length:

raise Exception('Resize stack failed, please pop some elements first.')

self._max = m

if self._max < 0:

self._max = 0

def init(self, iterable=()):

if not iterable:

return

self._top = Node(iterable[0])

for i in iterable[1:]:

node = self._top

self._top = Node(i)

self._top.next = node

def show(self):

def _traversal(self):

node = self._top

while node and node.next:

yield node

node = node.next

yield node

print('\n'.join(map(lambda x: '|{:^7}|'.format(str(x)), _traversal(self))) + '\n ' + 7 * '-')

@property

def length(self):

if self._top is None:

return 0

node = self._top

i = 1

while node.next:

node = node.next

i += 1

return i

@property

def is_empty(self):

return self._top is None

@property

def is_full(self):

return bool(self._max and self.length == self._max)

def push(self, item):

if self.is_full:

raise StackFullException('Error: trying to push element into a full stack!')

if not self._top:

self._top = Node(item)

return

node = self._top

self._top = Node(item)

self._top.next = node

def pop(self):

if self.is_empty:

raise StackEmptyException('Error: trying to pop element from an empty stack!')

node = self._top

self._top = self._top.next

return node.value

def top(self):

return self._top.value if self._top else self._top

def clear(self):

while self._top:

self.pop()

def test(stack):

print('\nShow stack:')

stack.show()

print('\nInit linked list:')

stack.init([1, 2, 3, 4, 5])

stack.show()

print('\nPush element to stack:')

stack.push(6)

stack.push(7)

stack.push('like')

stack.show()

print('\nCheck top element:')

print(stack.top())

print('\nPop element from stack:')

e = stack.pop()

print('Element %s popped,' % e)

stack.show()

print('\nSet stack max size:')

try:

stack.max = 1

except Exception as e:

print(e)

print('\nSet stack max size:')

stack.max = 7

print(stack.max)

print('\nPush full stack:')

try:

stack.push(7)

except StackFullException as e:

print(e)

print('\nClear stack:')

stack.clear()

stack.show()

print('\nStack is empty:')

print(stack.is_empty)

print('\nPop empty stack:')

try:

stack.pop()

except StackEmptyException as e:

print(e)

class TreeNode:

def __init__(self, val=None, lef=None, rgt=None):

self.value = val

self.left = lef

self.right = rgt

def __str__(self):

return str(self.value)

class BinaryTree:

def __init__(self, root=None):

self._root = root

def __str__(self):

return '\n'.join(map(lambda x: x[1]*4*' '+str(x[0]), self.pre_traversal()))

def pre_traversal(self, root=None):

if not root:

root = self._root

x = []

depth = -1

def _traversal(node):

nonlocal depth

depth += 1

x.append((node, depth))

if node and node.left is not None:

_traversal(node.left)

if node and node.right is not None:

_traversal(node.right)

depth -= 1

return x

return _traversal(root)

def in_traversal(self, root=None):

if not root:

root = self._root

x = []

depth = -1

def _traversal(node):

nonlocal depth

depth += 1

if node and node.left is not None:

_traversal(node.left)

x.append((node, depth))

if node and node.right is not None:

_traversal(node.right)

depth -= 1

return x

return _traversal(root)

def post_traversal(self, root=None):

if not root:

root = self._root

x = []

depth = -1

def _traversal(node):

nonlocal depth

depth += 1

if node and node.left is not None:

_traversal(node.left)

if node and node.right is not None:

_traversal(node.right)

x.append((node, depth))

depth -= 1

return x

return _traversal(root)

@property

def max_depth(self):

return sorted(self.pre_traversal(), key=lambda x: x[1])[-1][1]

def show(self, tl=None):

if not tl:

tl = self.pre_traversal()

print('\n'.join(map(lambda x: x[1]*4*' '+str(x[0]), tl)))

def make_empty(self):

self.__init__()

def insert(self, item):

if self._root is None:

self._root = TreeNode(item)

return

def _insert(item, node):

if not node:

return TreeNode(item)

if node.left is None:

node.left = _insert(item, node.left)

elif node.right is None:

node.right = _insert(item, node.right)

else:

if len(self.pre_traversal(node.left)) <= len(self.pre_traversal(node.right)):

node.left = _insert(item, node.left)

else:

node.right = _insert(item, node.right)

return node

self._root = _insert(item, self._root)

class ExpressionTree(BinaryTree):

SIGN = {'+': 1, '-': 1, '*': 2, '/': 2, '(': 3}

def gene_tree_by_postfix(self, expr):

s =Stack()

for i in expr:

if i in self.SIGN.keys():

right = s.pop()

left = s.pop()

node = TreeNode(i, left, right)

s.push(node)

else:

s.push(TreeNode(i))

self._root = s.pop()

class ExpressionTree(BinaryTree):

SIGN = {'+': 1, '-': 1, '*': 2, '/': 2, '(': 3}

def gene_tree_by_postfix(self, expr):

s = Stack()

for i in expr:

if i in self.SIGN.keys():

right = s.pop()

left = s.pop()

node = TreeNode(i, left, right)

s.push(node)

else:

s.push(TreeNode(i))

self._root = s.pop()

def test_expression_tree(ep):

t = ExpressionTree()

t.gene_tree_by_postfix(ep)

print('\n------先序遍历-------')

print(t)

print('\n------后序遍历------')

t.show(t.post_traversal())

print('\n-------中序遍历-------')

t.show(t.in_traversal())

if __name__ == '__main__':

ep = 'a b + c d e + * *'

test_expression_tree(ep.split(' '))

输出:

0169eb0dcce808a1f6a4cac37ceffeaf.png

回到AST

AST主要作用有三步:

1. 解析(PARSE):将代码字符串解析成抽象语法树。

2. 转换(TRANSFORM):对抽象语法树进行转换操作。

3. 生成(GENERATE): 根据变换后的抽象语法树再生成代码字符串。

Python官方对于CPython解释器对python源码的处理过程如下:

1. Parse source code into a parse tree (Parser/pgen.c)

2. Transform parse tree into an Abstract Syntax Tree (Python/ast.c)

3. Transform AST into a Control Flow Graph (Python/compile.c)

4. Emit bytecode based on the Control Flow Graph (Python/compile.c)

但是只知道上面还不够我们去理解,因为在Python中,以控制台为例,我们的输入都是些字符串例如a=2、b=[1,2,3,4,5]之类我们要如何让计算机去理解并且执行这些东西呢?

这就是解释器的解释过程,负责把关键字,变量,空格,特殊字符进行处理处理的过程大概有下面两个步骤

1. 将整个代码字符串分割成 语法单元数组。

2. 在分词结果的基础之上分析 语法单元之间的关系。

一个抽象语法树的基本构成

type:描述该语句的类型 --变量声明语句

kind:变量声明的关键字 -- var

declaration: 声明的内容数组,里面的每一项也是一个对象

type: 描述该语句的类型

id: 描述变量名称的对象

type:定义

name: 是变量的名字

init: 初始化变量值得对象

type: 类型

value: 值 "is tree" 不带引号

row: "\"is tree"\" 带引号

一般来说我们在可以Python的pythonrun里面找到

PyObject *type;

定义了语法树的类型

一般来说,研究抽象语法树有哪些用途呢?

在一种语言的IDE中,语法的检查、风格的检查、格式化、高亮、错误提示,代码自动补全等等

通过搭建一个Python的语法树去理解表达式是如何被解析的,我们来看一个(3+2-5*0)/3的例子:

#首先定义四则运算

Num = lambda env, n: n

Var = lambda env, x: env[x]

Add = lambda env, a, b:_eval(env, a) + _eval(env, b)

Mul = lambda env, a, b:_eval(env, a) * _eval(env, b)

Sub = lambda env, a, b:_eval(env, a) - _eval(env, b)

Div = lambda env, a, b:_eval(env, a) / _eval(env, b)

#定义表达式计算

_eval = lambda env, expr:expr[0](env, *expr[1:])

#定义环境中的自变量

env = {'i':5, 'j':2, 'k':3}

#定义语法树结构(我寻思这玩意怎么那么像Clojure呢。。。。。)

tree=(Div,(Sub,(Add,(Var,'k'),(Var,'j')),(Mul,(Var,'i'),(Num,0))),(Var,'k'))

print(_eval(env, tree))

输出:

4b3e268bd2de21b7d837629ed6161cfd.png

承接前一篇虚拟机的运行机制,我们来看看Python的AST解析过程

首先来看Python虚拟机的循环执行框架

位于pythonrun.c文件中

PyObject *

PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)

{

......

// 获取当前活动线程的线程状态对象(PyThreadState)

PyThreadState *tstate = PyThreadState_GET();

// 设置线程状态对象中的frame

tstate->frame = f;

co = f->f_code;

names = co->co_names;

consts = co->co_consts;

why = WHY_NOT;

......

for (;;) {

fast_next_opcode:

f->f_lasti = INSTR_OFFSET();

// 获取字节码指令

opcode = NEXTOP();

oparg = 0;

// 如果指令有参数,获取参数

if (HAS_ARG(opcode))

oparg = NEXTARG();

dispatch_opcode:

......

}

}

现在来调试一下PYVM

3a2a12dd07e256644d7909739085a50c.png

33d4e845f5ca7500a22c706804c10552.png

在我们对PYVM进行调试的过程中可以看到Py把stdin的字符串一个个“吃掉”了吃的过程是为了把字符串转换和解释为字节码,通过字节码构建抽象语法树,字节码的遍历是通过几个宏来实现:

#define INSTR_OFFSET() ((int)(next_instr - first_instr))

#define NEXTOP() (*next_instr++)

#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])

#define PEEKARG() ((next_instr[2]<<8) + next_instr[1])

#define JUMPTO(x) (next_instr = first_instr + (x))

#define JUMPBY(x) (next_instr += (x))

在程序内部通过PyTokenizer_Get来获取输入字符串中是否存在关键字,构建好语法树以后通过PyRun_InteractiveOneObjectEx执行。

Python中AST的节点定义

pythoncore/Parser/node.c

PyNode_New(int type)

{

node *n = (node *) PyObject_MALLOC(1 * sizeof(node));

if (n == NULL)

return NULL;

n->n_type = type;

n->n_str = NULL;

n->n_lineno = 0;

n->n_nchildren = 0;

n->n_child = NULL;

return n;

}

下面给出Python自带的AST例子,去观察构建出来的树

import ast

Monster ="""

class Monster:

def __init__(self):

self.level=0

self.hp=1000

self.boom=[x for x in range(10)]

def eat(self,frut):

self.hp+=1

def howl(self):

print("Ao uuuuuuuuuuuuuu")

monster=Monster()

monster.howl()

"""

if __name__=="__main__":

# cm = compile(Monster, '', 'exec')

# exec (cm)

r_node = ast.parse(Monster)

print(ast.dump(r_node))

通过compile我们可以编译Python字符串执行字串的内容

da67796a6a5bb38b1c8b2a902e075a42.png

同时,我们也可以用Python自带的AST库解析我们的字符串为语法树

475c6e305e45c9585c89f90bfdf1cf62.png

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值