本文介绍garbageCollectForReal方法中的第二步–markNonRootObjects.
该方法的功能是扫描堆中存活对象,将其所引用的对象标记为存活.
该方法使用了尾递归的方式扫描整个堆.同时为了防止递归次数过多,设置了阀值.最多递归次数为MAX_GC_DEPTH(4).这样,不管对象多少,则一定会处理完毕的.
markNonRootObjects的代码如下:
static void
markNonRootObjects(void) {
/* Scan the entire heap, looking for badly formed headers */
cell* scanner;
cell* endScanPoint = CurrentHeapEnd;
int scans = 0;
do {
WeakPointers = NULL;
WeakReferences = NULL;
initializeDeferredObjectTable(); // 初始化table
for (scanner = CurrentHeap;
scanner < endScanPoint;
scanner += SIZE(*scanner) + HEADERSIZE) {
if (ISMARKED(*scanner)) {// 如果该对象是存活的
cell *object = scanner + 1; // 获得对象指针
/* See markChildren() for comments on the arguments */
markChildren(object, object, MAX_GC_DEPTH);
}
}
if (ENABLEPROFILING) {
scans++;
}
} while (deferredObjectTableOverflow); // 如果存在溢出现象,则重新进行扫描
#if ENABLEPROFILING
GarbageCollectionRescans += (scans - 1);
#endif
}
其中initializeDeferredObjectTable方法如下:
#define DEFERRED_OBJECT_TABLE_SIZE 40
static cell *deferredObjectTable[DEFERRED_OBJECT_TABLE_SIZE]; // 表的长度为40
#define endDeferredObjectTable (deferredObjectTable + DEFERRED_OBJECT_TABLE_SIZE)
static cell **startDeferredObjects, **endDeferredObjects;
static int deferredObjectCount;
static int deferredObjectTableOverflow;
static void initializeDeferredObjectTable(void) {
startDeferredObjects = endDeferredObjects = deferredObjectTable; // 重置表
deferredObjectCount = 0; // 表中对象的个数
deferredObjectTableOverflow = FALSE; // 是否存在表溢出
}
markChildren使用了尾递归,此处使用了如下技巧:
- 它只递归地标记堆中位置小于“limit”值的对象。它标记但不在“超过限制”的对象上递归,因为marknonrootobjects()最终将到达这些对象
- 变量nextobject是在我们认为或希望只需要遵循当前节点的一个子节点的情况下设置的。
- 只允许迭代达到有限的深度即MAX_GC_DEPTH。如果深度超过了这个值,我们将子对象保存在deferredObjectTable中,然后在迭代次数为MAX_GC_DEPTH - 1时,处理deferredObjectTable。
- 如果deferredObjectTable已满,则重新进行扫描。(即回到markNonRootObjects中,重新进行扫描)
其代码如下:
static void
markChildren(cell* object, cell* limit, int remainingDepth)
{
cell *heapSpace = CurrentHeap;
cell *heapSpaceEnd = CurrentHeapEnd;
#define MARK_AND_RECURSE(child) \
if (inHeapSpaceFast(child)) { \
cell _tmp_ = OBJECT_HEADER(child); \
if (!ISKEPT(_tmp_)) { \
OBJECT_HEADER(child) = _tmp_ | MARKBIT; \
if ((cell*)child < limit) { \
RECURSE((cell *)child); \
} \
} \
}
#define MARK_AND_TAIL_RECURSE(child) \
if (inHeapSpaceFast(child)) { \
cell _tmp_ = OBJECT_HEADER(child); \
if (!ISKEPT(_tmp_)) { \
OBJECT_HEADER(child) = _tmp_ | MARKBIT; \
if ((cell*)child < limit) { \
if (nextObject != NULL) { \
RECURSE(nextObject); \
} \
nextObject = (cell *)(child); \
} \
} \
}
#define MARK_AND_TAIL_RECURSEX(child) \
if (inHeapSpaceFast(child)) { \
cell _tmp_ = OBJECT_HEADER(child); \
if (!ISKEPT(_tmp_)) { \
OBJECT_HEADER(child) = _tmp_ | MARKBIT; \
if ((cell*)child < limit) { \
nextObject = (cell *)(child); \
} \
} \
}
#define RECURSE(child) \
if (remainingDepth < 0) { \
putDeferredObject(child); \
} else { \
markChildren(child, limit, remainingDepth); \
}
/*
* 如果非空,那么它将保存对象的值,以便在循环中进行下一次迭代。用于实现尾部递归。
*/
cell *nextObject = NULL;
remainingDepth -= 1; /* remaining depth for any subcalls */
for (;;) {
cell *header = object - HEADERSIZE; // 获得该对象的对象头
GCT_ObjectType gctype = TYPE(*header);
#if INCLUDEDEBUGCODE
if (tracegarbagecollectionverbose) {
fprintf(stdout, "GC Mark: ");
printObject(object);
}
#endif
switch (gctype) {
int length;
cell **ptr;
case GCT_INSTANCE: {
/* The object is a Java object instance. Mark pointer fields */
INSTANCE instance = (INSTANCE)object;
INSTANCE_CLASS clazz = instance->ofClass;
/* Mark the possible monitor object alive 标记该对象的monitor*/
checkMonitorAndMark((OBJECT)instance);
while (clazz) {
/* This is the tough part: walk through all the fields */
/* of the object and see if they contain pointers 遍历字段,只处理指针*/
FOR_EACH_FIELD(thisField, clazz->fieldTable)
/* Is this a non-static pointer field? */
if ((thisField->accessFlags & (ACC_POINTER | ACC_STATIC))
== ACC_POINTER) {
int offset = thisField->u.offset;
cell* subobject = instance->data[offset].cellp;
MARK_AND_TAIL_RECURSE(subobject);
}
END_FOR_EACH_FIELD
clazz = clazz->superClass;
}
break;
}
case GCT_ARRAY:
/* The object is a Java array with primitive values. */
checkMonitorAndMark((OBJECT)object);
break;
case GCT_POINTERLIST: {
POINTERLIST list = (POINTERLIST)object;
length = list->length;
ptr = &list->data[0].cellp;
goto markArray;
}
case GCT_WEAKPOINTERLIST:
// 加入到WeakPointers链表中,在garbageCollectForReal 中的第3步处理
((WEAKPOINTERLIST)object)->gcReserved = WeakPointers;
WeakPointers = (WEAKPOINTERLIST)object;
break;
case GCT_OBJECTARRAY:
/* The object is a Java array with object references. */
checkMonitorAndMark((OBJECT)object);
length = ((ARRAY)object)->length;
ptr = &((ARRAY)object)->data[0].cellp;
/* FALL THROUGH */
markArray:
/* Keep objects in the array alive. */
while (--length >= 0) {
cell *subobject = *ptr++;
MARK_AND_TAIL_RECURSE(subobject);
}
break;
/* Added for java.lang.ref.WeakReference support in CLDC 1.1 */
case GCT_WEAKREFERENCE: {
/* Mark the possible monitor object alive */
checkMonitorAndMark((OBJECT)object);
// 将当前对应所引用的对应 加入到WeakReferences链表中,并在在garbageCollectForReal 中的第4步处理
((WEAKREFERENCE)object)->gcReserved = WeakReferences;
WeakReferences = (WEAKREFERENCE)object;
break;
}
case GCT_METHODTABLE:
// 处理方法表,此类型只在使用static区域时才生效
FOR_EACH_METHOD(thisMethod, ((METHODTABLE)object))
if ((thisMethod->accessFlags & ACC_NATIVE) == 0) {
MARK_OBJECT(thisMethod->u.java.code);
MARK_OBJECT_IF_NON_NULL(thisMethod->u.java.handlers);
}
END_FOR_EACH_METHOD
break;
case GCT_MONITOR:
// 此类型对象,已经在其他地方处理过了
break;
case GCT_THREAD:
// 此类型对象,在garbageCollectForReal中的第1步处理过了
break;
case GCT_NOPOINTERS:
// 此类型对象不含有任何指针,不需要处理
break;
case GCT_EXECSTACK:
// 此类型对象,在garbageCollectForReal中的第1步处理过了
break;
default:
// 其他类型,则抛出异常
fatalVMError(KVM_MSG_BAD_DYNAMIC_HEAP_OBJECTS_FOUND);
} /* End of switch statement */
if (nextObject != NULL) {
/* 此处进行尾递归 */
object = nextObject;
nextObject = NULL;
/* continue */
} else if (remainingDepth == MAX_GC_DEPTH - 1
&& deferredObjectCount > 0) {
object = getDeferredObject();
/* continue */
} else {
break; /* finish "for" loop. */
}
} /* end of for(ever) loop */
}
在该方法内部,定义了多个宏,但是在该方法中,只使用了MARK_AND_TAIL_RECURSE,RECURSE.
此处有个问题: 如果迭代次数过多,则会在markNonRootObjects中重新扫描整个堆,不会存在重复标记的现象吗?
答案是:在markChildren方法中,标记的时候,已经判断了对象是否标记过,如果不标记,才进行标记.不存在重复标记的现象.
通过看这个代码,可以看到,此算法是比较耗时的.但是由于kvm是运行在小内存设备上,最大支持到64M,因此也就不是特别慢(该算法用在jdk上,可以说是很慢了…)。