这段时间试着实现了一个简易的C语言垃圾收集器,代码不多,但对我的经验而言,确实费了不少心思。
这里感谢云风兄的开源:云风:blog.codingnow.com/2008/06/gc_for_c.html
以及LOGOS兄的对yfgc的解析和实现的一个概念版本:LOGOS:www.cppblog.com/darkdestiny
话说我知道自己的实现里面,有很多考虑不足的地方。但还是决定贴出来,讲述的我实现的思想,有不足的地方大家指出来也好纠正。
大家知道垃圾收集技术主要分为以下几种,他们有各自的优点与缺点:
1、引用技术(Reference Counting)
2、标记清除(Mark-Sweep)
3、标记整理/紧缩(Mark-Compact)
4、结点复制(Copying)
5、分代收集(Generational Collecting)
这里我就不详述各种思想了,google上有。我的需求是:要求垃圾收集能自然的解决循环引用的问题,占用的空间不要太多,轻量级的实现,解决内存碎片的问题。
这里我选用的是标记清除算法,内存分配使用内存池。
首先还是给出内存池的接口,实现在我之前的文章中有讲,就不贴出来了。内存池的实现借鉴了sgi stl的实现思路。
- #ifndef _MEM_POOL_H
- #define _MEM_POOL_H
- #include <stddef.h>
- void *mem_malloc(size_t n);
- void mem_free(void *p, size_t n);
- void *mem_realloc(void* ptr, size_t new_sz, size_t old_sz);
- #endif
然后看gc的接口:
- #ifndef GARBAGE_COLLECTOR_H
- #define GARBAGE_COLLECTOR_H
- #include <stddef.h>
- #define my_malloc mem_malloc
- #define my_free mem_free
- #define my_realloc mem_realloc
- /* define the level of node */
- #define NODE_ROOT_LEVEL 0 /* the node is global variable or in main() */
- #define NODE_NOW_LEVEL 1 /* the node is just use in present fucntion */
- #define NODE_PRE_LEVEL 2 /* the node is be referenced by call function */
- void gc_init();
- void gc_exit();
- void func_end(void *p, ...);
- void gc_link(void *p, int level);
- /* gc will mark the memory in gc_enter/gc_leave for collect later */
- void gc_enter();
- void gc_leave();
- void gc_collect();
- void *gc_malloc(size_t sz, void (*finalizer)(void *));
- void *gc_realloc(void *p, size_t sz);
- #endif
用到的数据结构:
- struct node {
- int mark; /* mark for gc collect */
- int level; /* the node leavel */
- struct {
- void *mem; /* the pointer point to memory*/
- int size; /* the size of memrory */
- void (*finalizer)(void *); /* destruction when the mem be free */
- }n;
- };
- static struct {
- struct node *pool; /* an array for store nodes */
- int size; /* the size of pool */
- int free; /* the next free node in pool */
- struct stack *stack; /* the stack used for store pointer in fuction */
- } E;
这里 level 取值为:NODE_ROOT_LEVEL、NODE_NOW_LEVEL、NODE_PRE_LEVEL。 基于这样的考虑:我们知道动态分配一块内存,如果要延长其生命期,要么通过函数返回值传回,要么通过多级指针,或者直接挂到全局变量上。所以这个gc基于这样的策略:首先用户分配的内存块所在的结点 level 值初始化为NODE_NOW_LEVEL,如果用户需要延长其生命期到上一级函数或全局变量,那么调用 gc_link 并传入相应的 level 值。仅在其生命期需要延长至上一级函数时需要在函数结尾处(通过返回值传递动态内存时需要在 return 前)调用 func_end。func_end的作用是将该内存块的 level 值设置为NODE_NOW_LEVEL。
知道了结点的生命期,标记就简单了。gc_leave负责将当前函数栈中的evel为NODE_NOW_LEVEL的结点标记为MARK_COLLECT,从而在 gc_collect 中回收。这里需要说的是main()函数中分配的内存和挂到全局变量的内存会在gc_exit中释放。
大致过程知道了,下面就是具体实现了:
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdarg.h>
- #include <assert.h>
- #include "stack.h"
- #include "mem_pool.h"
- #include "gc.h"
- #define POOL_INITIAL_NUMBER 1024 /* the initial number of nodes in pool */
- #define POOL_MAX_NUMBER 4096 /* the max number of nodes in pool */
- #define STACK_SECTION_TAG NULL /* the tag to section the stack */
- #define MARK_INITIAL -1 /* the node initialed */
- #define MARK_RESERVE 0 /* the node marked for reserve */
- #define MARK_COLLECT 1 /* the node marked for collect */
- struct node {
- int mark; /* mark for gc collect */
- int level; /* the node leavel */
- struct {
- void *mem; /* the pointer point to memory*/
- int size; /* the size of memrory */
- void (*finalizer)(void *); /* destruction when the mem be free */
- }n;
- };
- static struct {
- struct node *pool; /* an array for store the pointer of node */
- int size; /* the size of pool */
- int free; /* the next free node in pool */
- struct stack *stack; /* the stack used for store pointer in fuction */
- } E;
- static bool pool_compact()
- {
- int i, j;
- struct node temp;
- for (i = 0; i < E.free; i++) {
- if (E.pool[i].mark == MARK_INITIAL) {
- temp = E.pool[i];
- for (j = E.free; j > i; j--) {
- if (E.pool[j].mark != MARK_INITIAL) {
- E.pool[i] = E.pool[j];
- E.pool[j] = temp;
- break;
- }
- }
- }
- }
- for (i = 0; i < E.size; i++) {
- if (E.pool[i].mark == MARK_INITIAL) {
- E.free = i;
- break;
- }
- }
- return E.free >= E.size ? true : false;
- }
- static void node_init()
- {
- int i;
- for (i = E.free; i < E.size; i++) {
- E.pool[i].mark = MARK_INITIAL;
- E.pool[i].level = NODE_NOW_LEVEL;
- E.pool[i].n.mem = NULL;
- E.pool[i].n.finalizer = NULL;
- }
- }
- static void pool_expand()
- {
- int expand_size;
- bool expand = false;
- expand_size = E.size * 2;
- if (expand_size >= POOL_MAX_NUMBER * sizeof(struct node)) {
- expand = pool_compact();
- }
- if (expand) {
- E.pool = (struct node *)my_realloc(E.pool, expand_size * sizeof(struct node),
- E.size * sizeof(struct node));
- E.free = E.size;
- E.size = expand_size;
- /* init the node */
- node_init();
- }
- }
- static void node_alloc(void *p, size_t sz, void (*finalizer)(void *))
- {
- if (E.free >= E.size) {
- pool_expand();
- }
- E.pool[E.free].mark = MARK_RESERVE;
- E.pool[E.free].level = NODE_NOW_LEVEL;
- E.pool[E.free].n.mem = p;
- E.pool[E.free].n.size = sz; // for mem_free
- E.pool[E.free].n.finalizer = finalizer;
- E.free++;
- }
- static void pool_init()
- {
- E.pool = (struct node *)my_malloc(POOL_INITIAL_NUMBER * sizeof(struct node));
- E.free = 0;
- E.size = POOL_INITIAL_NUMBER;
- /* init the node */
- node_init();
- }
- void gc_init()
- {
- E.pool = NULL;
- E.size = 0;
- E.free = -1;
- E.stack = init_stack();
- pool_init();
- }
- void gc_link(void *p, int level)
- {
- int i;
- for (i = 0; i < E.free; i++) {
- if (E.pool[i].n.mem == p) {
- E.pool[i].level = level;
- break;
- }
- }
- }
- void gc_enter()
- {
- push(E.stack, STACK_SECTION_TAG);
- }
- /* accordind to the level of nodes, mark nodes. if in present stack section
- * of function, there are some nodes' life extend father function's life
- * which callthe present function, then push these nodes in stack section
- * of father's function.
- */
- void gc_leave()
- {
- void *p;
- struct stack *stack_temp;
- stack_temp = init_stack();
- while ((p = top(E.stack)) != STACK_SECTION_TAG) {
- int i;
- /* whether mark for gc collect or not by searching for the node
- whose mem element equals p. */
- for (i = 0; i < E.free; i++) {
- if (E.pool[i].n.mem == p ) {
- if (E.pool[i].level == NODE_NOW_LEVEL) {
- E.pool[i].mark = MARK_COLLECT;
- } else if (E.pool[i].level == NODE_PRE_LEVEL) {
- push(stack_temp, p);
- }
- break;
- }
- }
- pop(E.stack);
- }
- pop(E.stack); /* pop the STACK_SECTION_TAG */
- while (! stack_empty(stack_temp)) {
- p = top(stack_temp);
- push(E.stack, p);
- pop(stack_temp);
- }
- destory_stack(stack_temp);
- }
- void gc_collect()
- {
- int i;
- for (i = 0; i < E.free; i++) {
- if (E.pool[i].mark == MARK_COLLECT) {
- void *p = E.pool[i].n.mem;
- int sz = E.pool[i].n.size;
- if (E.pool[i].n.finalizer != NULL) {
- E.pool[i].n.finalizer(p);
- }
- my_free(p, sz); // for mem_free(p, size);
- E.pool[i].mark = MARK_INITIAL;
- }
- }
- }
- void *gc_malloc(size_t sz, void (* finalizer)(void *))
- {
- void *result = my_malloc(sz);
- node_alloc(result, sz, finalizer);
- push(E.stack, result);
- return result;
- }
- void *gc_realloc(void *p, size_t sz)
- {
- void *result;
- assert(sz > 0);
- if (p == NULL) {
- return gc_malloc(sz, NULL);
- }
- /* find the node contain p */
- int i;
- int old_sz;
- for (i = 0; i < E.free; i++) {
- if (E.pool[i].n.mem == p) {
- old_sz = E.pool[i].n.size;
- break;
- }
- }
- result = my_realloc(p, sz, old_sz);
- /* if new memory address is not change, just update size and return */
- if (result == p) {
- E.pool[i].n.size = sz;
- return result;
- } else {
- /* update size and mem */
- E.pool[i].n.size = sz;
- E.pool[i].n.mem = result;
- /* update the stack infomation */
- void *temp;
- struct stack *stack_temp = init_stack();
- while ((! stack_empty(E.stack)) && ((temp = top(E.stack)) != p)) {
- push(stack_temp, temp);
- pop(E.stack);
- }
- /* if the stack is not empty, pop the old address p and push
- the new address result */
- if (! stack_empty(E.stack)) {
- pop(E.stack);
- push(E.stack, result);
- }
- /* push the former data */
- while (! stack_empty(stack_temp)) {
- temp = top(stack_temp);
- push(E.stack, temp);
- pop(stack_temp);
- }
- destory_stack(stack_temp);
- }
- return result;
- }
- void gc_exit()
- {
- /* the rest nodes (normally are root nodes) will free in this function */
- int i;
- for (i = 0; i < E.free; i++) {
- if (E.pool[i].mark != MARK_INITIAL) {
- E.pool[i].mark = MARK_COLLECT;
- }
- }
- gc_collect();
- my_free(E.pool, E.size * sizeof(struct node));
- destory_stack(E.stack);
- }
- /* this remand the last paramater must be NULL */
- void func_end(void *p, ...)
- {
- va_list ap;
- void *temp = p;
- va_start(ap, p);
- int i;
- while (temp != NULL) {
- for (i = 0; i < E.free; i++) {
- if (E.pool[i].n.mem == temp) {
- E.pool[i].level = NODE_NOW_LEVEL;
- break;
- }
- }
- temp = va_arg(ap, void *);
- }
- va_end(ap);
- }
这里给出测试代码:
- #include <stdio.h>
- #include "gc.h"
- static void log_ptr(void *p)
- {
- printf("free %p/n",p);
- }
- int *foo1()
- {
- printf("in function foo1:/n");
- int *p3 = gc_malloc(3, log_ptr);
- int *p4 = gc_malloc(6, log_ptr);
- int *p5 = gc_realloc(p4, 1024);
- gc_link(p3, NODE_PRE_LEVEL);
- func_end(p3, NULL);
- return p3;
- }
- void foo()
- {
- gc_enter();
- int *p1 = foo1();
- int *p2 = gc_malloc(4, log_ptr);
- gc_link(p2, NODE_ROOT_LEVEL);
- gc_leave();
- printf("in foo:/n");
- gc_collect();
- }
- int main()
- {
- gc_init();
- gc_enter();
- void *p = gc_malloc(5, log_ptr);
- foo();
- gc_leave();
- printf("in fuction main:/n");
- gc_collect();
- gc_exit();
- return 0;
- }
运行结果: