起步
引用计数与垃圾收集
三色标记模型
寻找根对象(root object)的集合, 所谓的root object即是一些全局引用和函数栈中的引用. 这些引用所用的对象是不可被删除的. 而这个root object集合也是垃圾检测动作的起点.
root object集合出发, 沿着root object集合中的每一个引用, 如果能达到某个对象A, 则A成为可达的 (reachable) , 可达的对象也不可被删除. 这个阶段就是垃圾检测阶段.
当垃圾检测极端结束后, 所有的对象分为了可达与不可达两部分. 所有可达的对象都必须给予保留, 而所有不可达的对象所占用的内存将被回收, 这就是垃圾回收阶段
python 中的垃圾收集
可收集对象链表
[objimpl.h]
typedef union _gc_head {
struct {
union _gc_head *gc_next;
union _gc_head *gc_prev;
Py_ssize_t gc_refs;
} gc;
double dummy; /* force worst-case alignment */
} PyGC_Head;
[gcmodule.c]
PyObject * _PyObject_GC_New(PyTypeObject *tp)
{
PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
if (op != NULL)
op = PyObject_INIT(op, tp);
return op;
}
[gcmodule.c]
#define _PyGC_REFS_UNTRACKED (-2)
#define GC_UNTRACKED _PyGC_REFS_UNTRACKED
PyObject * _PyObject_GC_Malloc(size_t basicsize)
{
return _PyObject_GC_Alloc(0, basicsize);
}
static PyObject * _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
{
PyObject *op;
PyGC_Head *g;
size_t size;
if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head))
return PyErr_NoMemory();
size = sizeof(PyGC_Head) + basicsize;// 为本身对象即PyGC_Head申请内存
if (use_calloc)
g = (PyGC_Head *)PyObject_Calloc(1, size);
else
g = (PyGC_Head *)PyObject_Malloc(size);
if (g == NULL)
return PyErr_NoMemory();
g->gc.gc_refs = 0;
_PyGCHead_SET_REFS(g, GC_UNTRACKED);
generations[0].count++; /* number of allocated GC objects */
if (generations[0].count > generations[0].threshold &&
enabled &&
generations[0].threshold &&
!collecting &&
!PyErr_Occurred()) {
collecting = 1;
collect_generations();
collecting = 0;
}
op = FROM_GC(g);
return op;
}
PyGC_Head
PyObject_HEAD
container obj
[gcmodule.c]
#define AS_GC(o) ((PyGC_Head *)(o)-1)
#define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
[objimpl.h]
#define _Py_AS_GC(o) ((PyGC_Head *)(o)-1)
分代的垃圾收集
[gcmodule.c]
struct gc_generation {
PyGC_Head head;
int threshold; /* collection threshold */
int count; /* count of allocations or collections of younger
generations */
};
[gcmodule.c]
#define NUM_GENERATIONS 3
#define GEN_HEAD(n) (&generations[n].head)
static struct gc_generation generations[NUM_GENERATIONS] = {
/* PyGC_Head, threshold, count */
{{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
{{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
{{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
};
PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
[gcmodule.c]
static Py_ssize_t collect_generations(void)
{
int i;
Py_ssize_t n = 0;
for (i = NUM_GENERATIONS-1; i >= 0; i--) {
if (generations[i].count > generations[i].threshold) {
if (i == NUM_GENERATIONS - 1
&& long_lived_pending < long_lived_total / 4)
continue;
n = collect_with_callback(i);
break;
}
}
return n;
}
python 中的标记--清除方法
[gcmodule.c]
static void gc_list_merge(PyGC_Head *from, PyGC_Head *to)
{
PyGC_Head *tail;
assert(from != to);
if (!gc_list_is_empty(from)) {
tail = to->gc.gc_prev;
tail->gc.gc_next = from->gc.gc_next;
tail->gc.gc_next->gc.gc_prev = tail;
to->gc.gc_prev = from->gc.gc_prev;
to->gc.gc_prev->gc.gc_next = to;
}
gc_list_init(from);
}
static void gc_list_init(PyGC_Head *list)
{
list->gc.gc_prev = list;
list->gc.gc_next = list;
}
寻找 root Object 集合
[gcmodule.c]
static void
update_refs(PyGC_Head *containers)
{
PyGC_Head *gc = containers->gc.gc_next;
for (; gc != containers; gc = gc->gc.gc_next) {
_PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc)));
}
}
[gcmodule.c]
static void subtract_refs(PyGC_Head *containers)
{
traverseproc traverse;
PyGC_Head *gc = containers->gc.gc_next;
for (; gc != containers; gc=gc->gc.gc_next) {
traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
(void) traverse(FROM_GC(gc), (visitproc)visit_decref, NULL);
}
}
[dictobject.c]
PyTypeObject PyDict_Type = {
...
dict_traverse, /* tp_traverse */
...
}
static int dict_traverse(PyObject *op, visitproc visit, void *arg)
{
PyDictObject *mp = (PyDictObject *)op;
PyDictKeysObject *keys = mp->ma_keys;
PyDictKeyEntry *entries = DK_ENTRIES(keys);
Py_ssize_t i, n = keys->dk_nentries;
if (keys->dk_lookup == lookdict) {
for (i = 0; i < n; i++) {
if (entries[i].me_value != NULL) {
Py_VISIT(entries[i].me_value);
Py_VISIT(entries[i].me_key);
}
}
}
else {
if (mp->ma_values != NULL) {
for (i = 0; i < n; i++) {
Py_VISIT(mp->ma_values[i]);
}
}
else {
for (i = 0; i < n; i++) {
Py_VISIT(entries[i].me_value);
}
}
}
return 0;
}
[gcmodule.c]
static int visit_decref(PyObject *op, void *data)
{
assert(op != NULL);
if (PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyGCHead_REFS(gc) > 0)
_PyGCHead_DECREF(gc);
}
return 0;
}
垃圾标记
[gcmodule.c]
static void
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
{
PyGC_Head *gc = young->gc.gc_next;
while (gc != young) {
PyGC_Head *next;
// 对于root object , 设置其gc_refs为GC_REACHABLE标志
if (_PyGCHead_REFS(gc)) {
PyObject *op = FROM_GC(gc);
traverseproc traverse = Py_TYPE(op)->tp_traverse;
assert(_PyGCHead_REFS(gc) > 0);
_PyGCHead_SET_REFS(gc, GC_REACHABLE);
(void) traverse(op,
(visitproc)visit_reachable,
(void *)young);
next = gc->gc.gc_next;
if (PyTuple_CheckExact(op)) {
_PyTuple_MaybeUntrack(op);
}
}
else {// 对非root对象, 迁移到unreachable链表
next = gc->gc.gc_next;
gc_list_move(gc, unreachable);
_PyGCHead_SET_REFS(gc, GC_TENTATIVELY_UNREACHABLE);
}
gc = next;
}
}
垃圾回收
[gcmodule.c]
static int gc_list_is_empty(PyGC_Head *list)
{
return (list->gc.gc_next == list);
}
static void delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
{
inquiry clear;
while (!gc_list_is_empty(collectable)) {
PyGC_Head *gc = collectable->gc.gc_next;
PyObject *op = FROM_GC(gc);
if (debug & DEBUG_SAVEALL) {
PyList_Append(garbage, op);
}
else {
if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
Py_INCREF(op);
clear(op);
Py_DECREF(op);
}
}
if (collectable->gc.gc_next == gc) {
gc_list_move(gc, old);
_PyGCHead_SET_REFS(gc, GC_REACHABLE);
}
}
}
垃圾收集全景
python中的 gc 模块
import gc
class A(object):
pass
class B(object):
pass
gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_LEAK)
a = A()
b = B()
gc.collect()
######### output:
gc: collecting generation 2...
gc: objects in each generation: 299 1527 4971
gc: done, 0.0000s elapsed
gc: collecting generation 2...
gc: objects in each generation: 1 0 6670
gc: done, 0.0000s elapsed
import gc
class A(object):
pass
class B(object):
pass
gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_LEAK)
a = A()
b = B()
a.b = b
b.a = a
del a
del b
gc.collect()
######output:
gc: collecting generation 2...
gc: objects in each generation: 301 1527 4971
gc: collectable
gc: collectable
gc: collectable
gc: collectable
gc: done, 4 unreachable, 0 uncollectable, 0.0000s elapsed
import gc
class A(object):
def __del__(self):
pass
class B(object):
def __del__(self):
pass
gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_LEAK)
a = A()
b = B()
a.b = b
b.a = a
del a
del b
gc.collect()
#### output:
gc: collecting generation 2...
gc: objects in each generation: 307 1527 4971
gc: collectable
gc: collectable
gc: collectable
gc: collectable
gc: done, 4 unreachable, 0 uncollectable, 0.0000s elapsed
gc触发时机
当计数器从(699,3,0)增加到(700,3,0),gc模块就会执行 gc.collect(0),即检查一代对象的垃圾,并重置计数器为(0,4,0)
当计数器从(699,9,0)增加到(700,9,0),gc模块就会执行 gc.collect(1),即检查一、二代对象的垃圾,并重置计数器为(0,0,1)
当计数器从(699,9,9)增加到(700,9,9),gc模块就会执行 gc.collect(2),即检查一、二、三代对象的垃圾,并重置计数器为(0,0,0)