通用数据结构:Tvalue
// lua中所有对象的基本数据结构
union Value {
GCObject *gc; /* collectable objects */
void *p; /* light userdata */
int b; /* booleans */
lua_CFunction f; /* light C functions */
lua_Integer i; /* integer numbers */
lua_Number n; /* float numbers */
};
//Tagged Values. This is the basic representation of values in Lua, 这个宏代表连个数据类型,以个是上面的结构一个是整数,者连个才合成了lua中的基本值;
#define TValuefields Value value_; int tt_
struct lua_TValue {
TValuefields;
};
typedef struct lua_TValue TValue;
上面的定义可以看成,TValue将是lua中的基本数据结构;
基本类型定义:basic types
下面是最基本的类型定义宏:
/*
** basic types
*/
#define LUA_TNONE (-1)
#define LUA_TNIL 0
#define LUA_TBOOLEAN 1
#define LUA_TLIGHTUSERDATA 2
#define LUA_TNUMBER 3
#define LUA_TSTRING 4
#define LUA_TTABLE 5
#define LUA_TFUNCTION 6
#define LUA_TUSERDATA 7
#define LUA_TTHREAD 8
函数类型在lua中被称为第一类, 其定义如下:
高位代表类型的变体,低位代表类型
/*
** LUA_TFUNCTION variants:
** 0 - Lua function
** 1 - light C function
** 2 - regular C function (closure)
*/
/* Variant tags for functions */
#define LUA_TLCL (LUA_TFUNCTION | (0 << 4)) /* Lua closure */
#define LUA_TLCF (LUA_TFUNCTION | (1 << 4)) /* light C function */
#define LUA_TCCL (LUA_TFUNCTION | (2 << 4)) /* C closure */
Table数据结构
lua的哈希表有一个高效的实现, 几乎可以任务操作哈希表的时间复杂度为常数;下面是lua源码中对table的介绍:
/*
** Implementation of tables (aka arrays, objects, or hash tables).
** Tables keep its elements in two parts: an array part and a hash part.
** Non-negative integer keys are all candidates to be kept in the array
** part. The actual size of the array is the largest 'n' such that
** more than half the slots between 1 and n are in use.
** Hash uses a mix of chained scatter table with Brent's variation.
** A main invariant of these tables is that, if an element is not
** in its main position (i.e. the 'original' position that its hash gives
** to it), then the colliding element is in its own main position.
** Hence even when the load factor reaches 100%, performance remains good.
*/
下面就是其论文”The Implementation of lua”中给出的table结构示意图:
总体意识就是: 整数为键的pair是优先存储在数组中,table根据内容自动并且动态的对这两部分进行是的的分配, 图中的string是存储的hash中;
table的数据结构如下:
// hash表node的键结构
typedef union TKey {
struct {
TValuefields;
int next; /* for chaining (offset for next node) */
} nk;
TValue tvk;
} TKey;
// hash链表节点
typedef struct Node {
TValue i_val;
TKey i_key;
} Node;
// 表中包含有hash表node(长度lsizenode)和数组array(长度sizearray)两部分
typedef struct Table {
CommonHeader; // 公共头部
lu_byte flags; /* 1<<p means tagmethod(p) is not present */
lu_byte lsizenode; /* log2 of size of 'node' array */
unsigned int sizearray; /* size of 'array' array */
TValue *array; /* array part */
Node *node;
Node *lastfree; /* any free position is before this position */
struct Table *metatable;
GCObject *gclist;
} Table;
table读取
论文说明:
即非负整数键都有可能存储在array部分, hash 使用了一个混合chained scatter table(链状发散表)和Brent’s变量类型的结构; (链状发散表就是指hash表头节点指向一个链表, 链表中的键相同)
表读取函数如下, 会根据具体不同的类型调用不同的哈希查找方法,比如int则是优先在array中查找:
/* ** main search function */ const TValue *luaH_get (Table *t, const TValue *key) { switch (ttype(key)) { case LUA_TSHRSTR: return luaH_getstr(t, tsvalue(key)); case LUA_TNUMINT: return luaH_getint(t, ivalue(key)); case LUA_TNIL: return luaO_nilobject; case LUA_TNUMFLT: { lua_Integer k; if (luaV_tointeger(key, &k, 0)) /* index is int? */ return luaH_getint(t, k); /* use specialized version */ /* else... */ } /* FALLTHROUGH */ default: { Node *n = mainposition(t, key); for (;;) { /* check whether 'key' is somewhere in the chain */ if (luaV_rawequalobj(gkey(n), key)) return gval(n); /* that's it */ else { int nx = gnext(n); if (nx == 0) break; n += nx; } }; return luaO_nilobject; } } }
这里有分short string, int, nil, double几种查找,如下面是short string:
/* ** search function for short strings */ const TValue *luaH_getshortstr (Table *t, TString *key) { Node *n = hashstr(t, key); // 通过键查找到头节点 lua_assert(key->tt == LUA_TSHRSTR); for (;;) { /* check whether 'key' is somewhere in the chain */ const TValue *k = gkey(n); if (ttisshrstring(k) && eqshrstr(tsvalue(k), key)) return gval(n); /* that's it */ else { int nx = gnext(n); if (nx == 0) break; n += nx; } }; return luaO_nilobject; }
下面是int的获取方式, 可以看出, 当超出数组范围时就会查找hash表:
/* ** search function for integers */ const TValue *luaH_getint (Table *t, lua_Integer key) { /* (1 <= key && key <= t->sizearray) */ if (l_castS2U(key - 1) < t->sizearray) return &t->array[key - 1]; else { Node *n = hashint(t, key); for (;;) { /* check whether 'key' is somewhere in the chain */ if (ttisinteger(gkey(n)) && ivalue(gkey(n)) == key) return gval(n); /* that's it */ else { int nx = gnext(n); if (nx == 0) break; n += nx; } }; return luaO_nilobject; } }
在论文中经常提到mainpostion, 这个是指array中的位置或hash表键KEY对应的链表的头节点;
/* ** returns the 'main' position of an element in a table (that is, the index ** of its hash value) */ static Node *mainposition (const Table *t, const TValue *key) { /*...*/ }
table写入
/* ** beware: when using this function you probably need to check a GC ** barrier and invalidate the TM cache. */ TValue *luaH_set (lua_State *L, Table *t, const TValue *key) { const TValue *p = luaH_get(t, key); if (p != luaO_nilobject) return cast(TValue *, p); else return luaH_newkey(L, t, key); }
重点在luaH_newkey函数里,
/* ** inserts a new key into a hash table; first, check whether key's main ** position is free. If not, check whether colliding node is in its main ** position or not: if it is not, move colliding node to an empty place and ** put new key in its main position; otherwise (colliding node is in its main ** position), new key goes to an empty position. 检查mainpostion是不是冲突节点, 如果不是则给冲突节点重新分配内存, 并把自己写入mainpos;如果是则分配新空间把自己写人(这里不是特别理解) */ TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) { Node *mp; TValue aux; if (ttisnil(key)) luaG_runerror(L, "table index is nil"); else if (ttisfloat(key)) { lua_Integer k; if (luaV_tointeger(key, &k, 0)) { /* index is int? */ // float转换为int setivalue(&aux, k); key = &aux; /* insert it as an integer */ } else if (luai_numisnan(fltvalue(key))) luaG_runerror(L, "table index is NaN"); } mp = mainposition(t, key); if (!ttisnil(gval(mp)) || isdummy(mp)) { /* main position is taken? */ Node *othern; Node *f = getfreepos(t); /* get a free place */ // 通过lastfree域来查找新空闲节点 if (f == NULL) { /* cannot find a free place? */ rehash(L, t, key); /* grow table */ // rehash过程 /* whatever called 'newkey' takes care of TM cache and GC barrier */ return luaH_set(L, t, key); /* insert key into grown table */ } lua_assert(!isdummy(f)); othern = mainposition(t, gkey(mp)); /*.......*/ }
看rehash过程:
/* ** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i */ static void rehash (lua_State *L, Table *t, const TValue *ek) { unsigned int asize; /* optimal size for array part */ // 数组中个数的最优个数 unsigned int na; /* number of keys in the array part */ // KEY个数 unsigned int nums[MAXABITS + 1]; int i; int totaluse; for (i = 0; i <= MAXABITS; i++) nums[i] = 0; /* reset counts */ // numusearray将array分为2^(i-1)~2^i个这样的片段来统计KEY的个数, 没有将所有的整数都存放与数组中, 而是将多于一半的整数KEY保存到数组; na = numusearray(t, nums); /* count keys in array part */ totaluse = na; /* all those keys are integer keys */ totaluse += numusehash(t, nums, &na); /* count keys in hash part */ /* count extra key */ na += countint(ek, nums); totaluse++; /* compute new size for array part */ // 下面的函数保证了空间一半以上被利用 asize = computesizes(nums, &na); /* resize the table to new computed sizes */ luaH_resize(L, t, asize, totaluse - na); }
// 注: 数组只会增大, 而hash会增大或减小
TString
字符串是存放于全局hash表里, 存放内部化字符串即短字符串时也可能会需要将哈希链表扩大;
/* ** Header for string value; string bytes follow the end of this structure ** (aligned according to 'UTString'; see next). 字符串的头部, 数据跟随这个头部 */ typedef struct TString { CommonHeader; lu_byte extra; /* reserved words for short strings; "has hash" for longs */ lu_byte shrlen; /* length for short strings */ unsigned int hash; union { size_t lnglen; /* length for long strings */ struct TString *hnext; /* linked list for hash table */ } u; } TString;
UserData
存储形式上和字符串相同, 但不追究’/0’
/* ** Header for userdata; memory area follows the end of this structure ** (aligned according to 'UUdata'; see next). */ typedef struct Udata { CommonHeader; lu_byte ttuv_; /* user value's tag */ struct Table *metatable; size_t len; /* number of bytes */ union Value user_; /* user value */ } Udata;
栈和调用链
lua线程数据结构如下, 每个线程里都有一个指向全局的共享lua状态:
/* ** 'per thread' state */ struct lua_State { CommonHeader; lu_byte status; StkId top; /* first free slot in the stack */ global_State *l_G; // 所有线程共享的全局状态, 真正的lua虚拟机 /**....**/ StkId stack_last; /* last free slot in the stack */ StkId stack; /* stack base */ UpVal *openupval; /* list of open upvalues in this stack */ // 指向栈的openupval GCObject *gclist; /// 垃圾回收 /**....**/ };
lua_State的所有的lua C API都是围绕这个状态机来改变状态的, 独立在线程栈里操作;
而全局共享的真正虚拟机是如下说明的:
/* ** 'global state', shared by all threads of this state */ typedef struct global_State { /**.....**/ }
状态机的栈信息数据结构StkId
看到下面的定义可以知道, StkId就是一个TValuefields宏定义的结构, 该结构包含Value value_;int tt_
两部分, value_是联合值,Value类型, tt_是说明联合对象的类型; 由上面Value的结构可知, 它是一个由{垃圾回收类型;void*的light userdata; booleans; functions; integer; number;} 这些类型组合的联合类型, 所以需要一个tt_来说明当前的TValue到底是什么类型;
typedef TValue *StkId; /* index to stack elements */ #define BASIC_STACK_SIZE (2*LUA_MINSTACK) // 栈大小 /* minimum Lua stack available to a C function */ #define LUA_MINSTACK 20
栈的初始化
数据栈和调用栈共享了lua的线程, 同一个虚拟机中不同线程共享了global_State;
// 栈这边代码的还没有仔细研究 static void stack_init (lua_State *L1, lua_State *L) { int i; CallInfo *ci; // CallInfo是当前函数的调用栈, 以双向链表的形式存在与线程对象里 /* initialize stack array */ L1->stack = luaM_newvector(L, BASIC_STACK_SIZE, TValue); // 初始化长度 L1->stacksize = BASIC_STACK_SIZE; for (i = 0; i < BASIC_STACK_SIZE; i++) setnilvalue(L1->stack + i); /* erase new stack */ L1->top = L1->stack; L1->stack_last = L1->stack + L1->stacksize - EXTRA_STACK; /* initialize first ci */ ci = &L1->base_ci; ci->next = ci->previous = NULL; ci->callstatus = 0; ci->func = L1->top; setnilvalue(L1->top++); /* 'function' entry for this 'ci' */ ci->top = L1->top + LUA_MINSTACK; L1->ci = ci; }
线程
数据栈和调用栈构成了lua的线程, 同一个虚拟机中不同线程共享了global_State
参考lua_newthread的创建过程:
// lua_newstate创建的是lua虚拟机 LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) { /***.....**/ } // lua_newthread是线程 LUA_API lua_State *lua_newthread (lua_State *L) { global_State *g = G(L); lua_State *L1; lua_lock(L); luaC_checkGC(L); /* create new thread */ // LX: thread state + extra space L1 = &cast(LX *, luaM_newobject(L, LUA_TTHREAD, sizeof(LX)))->l; L1->marked = luaC_white(g); L1->tt = LUA_TTHREAD; // 类型 /* link it on list 'allgc' */ // 挂到垃圾回收上 L1->next = g->allgc; g->allgc = obj2gco(L1); /* anchor it on L stack */ setthvalue(L, L->top, L1); api_incr_top(L); preinit_thread(L1, g); L1->hookmask = L->hookmask; L1->basehookcount = L->basehookcount; L1->hook = L->hook; resethookcount(L1); /* initialize L1 extra space */ memcpy(lua_getextraspace(L1), lua_getextraspace(g->mainthread), LUA_EXTRASPACE); luai_userstatethread(L, L1); stack_init(L1, L); /* init stack */ lua_unlock(L); return L1; }
lua C API
一般的如lua_pushstring之类的理解不难, 现在看一个lua_pushvalue的代码:
LUA_API void lua_pushvalue (lua_State *L, int idx) { lua_lock(L); setobj2s(L, L->top, index2addr(L, idx)); // 顶部压值 api_incr_top(L); lua_unlock(L); } // index2addr的实现 static TValue *index2addr (lua_State *L, int idx) { CallInfo *ci = L->ci; // 调用栈 if (idx > 0) { // 正索引 TValue *o = ci->func + idx; // 被调用函数的栈底+idx索引找到对应的值 api_check(L, idx <= ci->top - (ci->func + 1), "unacceptable index"); if (o >= L->top) return NONVALIDVALUE; else return o; } else if (!ispseudo(idx)) { /* negative index */ // 负索引 api_check(L, idx != 0 && -idx <= L->top - (ci->func + 1), "invalid index"); return L->top + idx; } else if (idx == LUA_REGISTRYINDEX) // 全局 return &G(L)->l_registry; else { /* upvalues */ idx = LUA_REGISTRYINDEX - idx; api_check(L, idx <= MAXUPVAL + 1, "upvalue index too large"); if (ttislcf(ci->func)) /* light C function? */ return NONVALIDVALUE; /* it has no upvalues */ else { CClosure *func = clCvalue(ci->func); return (idx <= func->nupvalues) ? &func->upvalue[idx-1] : NONVALIDVALUE; } } }