一、缘由
听了朋友吹Lua的闭包实现,感觉Lua的闭包实现很优雅,之前也在网上看到一些关于python闭包的源码分析,但是好像很复杂的样子没有细细深究。所以现在好好分析一下。
二、开始
以这段代码开始(基于Python 3.4.4,CPython):
def f(x):
def add(value):
return x+value
return add
首先,我们要知道.py文件是会被编译成.pyc文件然后让Python 虚拟机执行。而Python虚拟机是基于栈帧的。那么这段代码是以什么形式存在的呢?是会以PyCodeOject的数据结构存在。Python会为每个Code Block产生一个PyCodeOject。Code Block是指进入一个名字空间,函数,import等都会产生Code Block。
以下是PyCodeObject的头文件,在PyCodeObject.h定义
/* Bytecode object */
typedef struct {
PyObject_HEAD
int co_argcount; /* #arguments, except *args */
int co_kwonlyargcount; /* #keyword only arguments */
int co_nlocals; /* #local variables */
int co_stacksize; /* #entries needed for evaluation stack */
int co_flags; /* CO_..., see below */
PyObject *co_code; /* instruction opcodes */
PyObject *co_consts; /* list (constants used) */
PyObject *co_names; /* list of strings (names used) */
PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest doesn't count for hash or comparisons */
unsigned char *co_cell2arg; /* Maps cell vars which are arguments. */
PyObject *co_filename; /* unicode (where it was loaded from) */
PyObject *co_name; /* unicode (name, for reference) */
int co_firstlineno; /* first source line number */
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See
Objects/lnotab_notes.txt for details. */
void *co_zombieframe; /* for optimization only (see frameobject.c) */
PyObject *co_weakreflist; /* to support weakrefs to code objects */
} PyCodeObject;
我们关心两个变量,
co_freevars
和
co_cellvars。
co_freevars是嵌套函数保存外层作用域的变量名集合,co_cellvars是外层函数保存被嵌套函数使用的变量名集合。而且这两者是在编译期是已经确定下来的了。
def f(x):
def add(value):
return x+value
print("add co_cellvars:"+"".join(add.__code__.co_cellvars)) # add co_cellvars:
print("add co_freevars:"+"".join(add.__code__.co_freevars)) # add co_freevars:x
return add
print("f co_cellvars:"+"".join(f.__code__.co_cellvars)) # f co_cellvars:x
print("f co_freevars:"+"".join(f.__code__.co_freevars)) # f co_freevars:
但是只是保存了变量名有什么用呢?关键是值是怎么传递进去的,即x的值是怎么传递到嵌套函数add里面,还有嵌套函数add是如何取得x的值。
三、从f(5)继续出发,
当执行f(5)进行函数调用的时候,是调用指令CALL_FUNCTION(ceval.c里面)->执行call_function(&sp, oparg, &intr0, &intr1)->fast_function(func, pp_stack, n, na, nk); fast_function函数里面我们关注这代码
return PyEval_EvalCodeEx((PyObject*)co, globals,
(PyObject *)NULL, (*pp_stack)-n, na,
(*pp_stack)-2*nk, nk, d, nd, kwdefs,
PyFunction_GET_CLOSURE(func));
PyEval_EvalCodeEx函数中关注了这些段代码。
f = PyFrame_New(tstate, co, globals, locals);
.....
for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) {
PyObject *c;
int arg;
/* Possibly account for the cell variable being an argument. */
if (co->co_cell2arg != NULL &&
(arg = co->co_cell2arg[i]) != CO_CELL_NOT_AN_ARG) {
c = PyCell_New(GETLOCAL(arg));
/* Clear the local copy. */
SETLOCAL(arg, NULL);
}
else {
c = PyCell_New(NULL);//用来占位置,因为如果cellvars不是参数,python用STORE_DEREF指令在外部进行赋值
}
if (c == NULL)
goto fail;
SETLOCAL(co->co_nlocals + i, c);//保存在f->localsplus中
}
......
retval = PyEval_EvalFrameEx(f,0);
这两段代码第一是生成一个PyFrameObject对象 f。第二,就是发现co_cell2arg不为空的时候,将arg的值(即5)用PyCellObject包起来,然后放到f->localsplus中。co_cell2arg顾名思义就是用来做参数的cellvars。接下来看看PyFrameObject(在Frameobject.h)
typedef struct _frame {
PyObject_VAR_HEAD
struct _frame *f_back; /* previous frame, or NULL */
PyCodeObject *f_code; /* code segment */
PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
PyObject *f_globals; /* global symbol table (PyDictObject) */
PyObject *f_locals; /* local symbol table (any mapping) */
PyObject **f_valuestack; /* points after the last local */
.....
PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
} PyFrameObject;
Python虚拟机的原理就是模拟可执行程序再X86机器上的运行,而这个数据结构就是python虚拟机模拟栈帧操作的关键,X86的运行时栈帧如下图:
接下来就是执行PyEval_EvalFrameEx函数,这个函数顾名思义就是求PyFrameObject的值,这里需要opcode帮忙,我们可以通过dis.dis(f)来获得
dis.dis(f)
'''
3 0 LOAD_CLOSURE 0 (x)
3 BUILD_TUPLE 1
6 LOAD_CONST 1 (<code object add at 0x023AB7A0, file "test.py", line 3>)
9 LOAD_CONST 2 ('f.<locals>.add')
12 MAKE_CLOSURE 0
15 STORE_FAST 1 (add)
5 18 LOAD_FAST 1 (add)
21 RETURN_VALUE
'''
接下来就是一个一个慢慢分析这个指令了。
LOAD_CLOSURE 指令是将我们在上面打包成CellOject压栈
freevars = f->f_localsplus + co->co_nlocals;
......
TARGET(LOAD_CLOSURE) {
PyObject *cell = freevars[oparg];
Py_INCREF(cell);
PUSH(cell);
DISPATCH();
}
BUILD_TUPLE指令是将所有的CellOject打包成tuple对象,入栈
TARGET(BUILD_TUPLE) {
PyObject *tup = PyTuple_New(oparg);
if (tup == NULL)
goto error;
while (--oparg >= 0) {
PyObject *item = POP();
PyTuple_SET_ITEM(tup, oparg, item);
}
PUSH(tup);
DISPATCH();
}
接下来的两个LOAD_CONST就是压入add函数的code对象,还有add函数的名字
MAKE_CLOSURE 指令,弹出add函数的code对象,还有add函数的名字,new一个PyFunctionObject func,然后弹出tuple对象,
TARGET_WITH_IMPL(MAKE_CLOSURE, _make_function)
TARGET(MAKE_FUNCTION)
_make_function: {
int posdefaults = oparg & 0xff;
int kwdefaults = (oparg>>8) & 0xff;
int num_annotations = (oparg >> 16) & 0x7fff;
PyObject *qualname = POP(); /* qualname */
PyObject *code = POP(); /* code object */
PyObject *func = PyFunction_NewWithQualName(code, f->f_globals, qualname);
Py_DECREF(code);
Py_DECREF(qualname);
if (func == NULL)
goto error;
if (opcode == MAKE_CLOSURE) {
PyObject *closure = POP();
if (PyFunction_SetClosure(func, closure) != 0) {
/* Can't happen unless bytecode is corrupt. */
Py_DECREF(func);
Py_DECREF(closure);
goto error;
}
Py_DECREF(closure);
}
然后 PyFunction_SetClosure函数就是将add函数的FunctionObject的func_closure指向这个tuple
<br style="font-size:18px;" />PyFunction_SetClosure(PyObject *op, PyObject *closure)
{
......
Py_XDECREF(((PyFunctionObject *) op) -> func_closure);
((PyFunctionObject *) op) -> func_closure = closure;
return 0;
}
到这里外层函数的cellvars的值已经固定到嵌套函数add里面了!!
四、有始有终
嵌套函数add被调用的时候,根据刚才的过程call_func->fast_func->PyEval_EvalCodeEx->PyEval_EvalFrameEx
一步一步来
import dis
def f(x):
def add(value):
return x+value
dis.dis(add)
return add
a = f(5)
a(4)
首先是
fast_func的这段:
return PyEval_EvalCodeEx((PyObject*)co, globals,
(PyObject *)NULL, (*pp_stack)-n, na,
(*pp_stack)-2*nk, nk, d, nd, kwdefs,
PyFunction_GET_CLOSURE(func));
我们关注PyFunction_GET_CLOSURE(func));没有错了,就是取出刚出set进去的func_closure,即tuple对象而已。
然后是PyEval_EvalCodeEx函数里面,因为我们已经进入嵌套函数add里面,add它的co_freevars是不为空的,将tuple对象里面每个值即cellObject保存到f->f_localsplus里面
<pre name="code" class="cpp"><pre name="code" class="cpp" style="font-size: 18px;">f = PyFrame_New(tstate, co, globals, locals);<pre name="code" class="cpp">.....
freevars = f->f_localsplus + co->co_nlocals;
.....
for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) { PyObject *o = PyTuple_GET_ITEM(closure, i); Py_INCREF(o); freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o; }
.....
retval = PyEval_EvalFrameEx(f,0);
下面就是进入PyEval_EvalFrameEx函数里面,需要opcode帮忙。
4 0 LOAD_DEREF 0 (x)
3 LOAD_FAST 0 (value)
6 BINARY_ADD
7 RETURN_VALUE
LOAD_DEREF指令就是取出上一个步骤保存在
f->f_localsplus的值(cellObject)取出来,然后压栈
freevars = f->f_localsplus + co->co_nlocals;
.....
TARGET(LOAD_DEREF) {
PyObject *cell = freevars[oparg];
PyObject *value = PyCell_GET(cell);
if (value == NULL) {
format_exc_unbound(co, oparg);
goto error;
}
Py_INCREF(value);
PUSH(value);
DISPATCH();
}
LOAD_FAST指令就是取出add局部变量,然后压栈
BINARY_ADD指令很简单明了,取出两个操作区执行PyNumber_Add操作
TARGET(BINARY_ADD) {
PyObject *right = POP();
PyObject *left = TOP();
PyObject *sum;
if (PyUnicode_CheckExact(left) &&
PyUnicode_CheckExact(right)) {
sum = unicode_concatenate(left, right, f, next_instr);
/* unicode_concatenate consumed the ref to v */
}
else {
sum = PyNumber_Add(left, right);
Py_DECREF(left);
}
Py_DECREF(right);
SET_TOP(sum);
if (sum == NULL)
goto error;
DISPATCH();
}
五、结语
python就是这样让基于栈帧的C也支持了闭包(C原生是不支持的),这个过程是(毕竟包了一层)比较绕的。而Lua的闭包实现相当优雅(毕竟Lua不是基于栈帧的,是基于寄存器的虚拟机,当然python也有stackless python)。
最后问题来了f的FrameObject是什么时候被GC的呢?是add的FrameObject被GC之后,然后再GC?如果是这样的话,如果add一天还在,f就不能被释放了。大家有什么看法呢
def f(x):
def add(value):
return x+value
return add
a = f(5)
a(4)