lua5.3.1 源码阅读记录(基础)

最新推荐文章于 2022-03-19 20:40:37 发布

呵离

最新推荐文章于 2022-03-19 20:40:37 发布

阅读量2.7k

点赞数

分类专栏： Lua 文章标签： lua 源码

本文链接：https://blog.csdn.net/bbhe_work/article/details/50908192

版权

Lua 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

通用数据结构:Tvalue

// lua中所有对象的基本数据结构
union Value {
  GCObject *gc;    /* collectable objects */
  void *p;         /* light userdata */
  int b;           /* booleans */
  lua_CFunction f; /* light C functions */
  lua_Integer i;   /* integer numbers */
  lua_Number n;    /* float numbers */
};
//Tagged Values. This is the basic representation of values in Lua, 这个宏代表连个数据类型,以个是上面的结构一个是整数,者连个才合成了lua中的基本值;
#define TValuefields    Value value_; int tt_
struct lua_TValue {
  TValuefields;
}; 
typedef struct lua_TValue TValue;

上面的定义可以看成,TValue将是lua中的基本数据结构;

基本类型定义:basic types

下面是最基本的类型定义宏:

/*
** basic types
*/
#define LUA_TNONE       (-1)
#define LUA_TNIL        0
#define LUA_TBOOLEAN        1
#define LUA_TLIGHTUSERDATA  2
#define LUA_TNUMBER     3
#define LUA_TSTRING     4
#define LUA_TTABLE      5
#define LUA_TFUNCTION       6
#define LUA_TUSERDATA       7
#define LUA_TTHREAD     8

函数类型在lua中被称为第一类, 其定义如下:
高位代表类型的变体,低位代表类型

/*
** LUA_TFUNCTION variants:
** 0 - Lua function
** 1 - light C function
** 2 - regular C function (closure)
*/
/* Variant tags for functions */
#define LUA_TLCL    (LUA_TFUNCTION | (0 << 4))  /* Lua closure */
#define LUA_TLCF    (LUA_TFUNCTION | (1 << 4))  /* light C function */
#define LUA_TCCL    (LUA_TFUNCTION | (2 << 4))  /* C closure */

Table数据结构

lua的哈希表有一个高效的实现, 几乎可以任务操作哈希表的时间复杂度为常数;下面是lua源码中对table的介绍:

/*
** Implementation of tables (aka arrays, objects, or hash tables).
** Tables keep its elements in two parts: an array part and a hash part.
** Non-negative integer keys are all candidates to be kept in the array
** part. The actual size of the array is the largest 'n' such that
** more than half the slots between 1 and n are in use.
** Hash uses a mix of chained scatter table with Brent's variation.
** A main invariant of these tables is that, if an element is not
** in its main position (i.e. the 'original' position that its hash gives
** to it), then the colliding element is in its own main position.
** Hence even when the load factor reaches 100%, performance remains good.
*/

下面就是其论文”The Implementation of lua”中给出的table结构示意图:
这里写图片描述

总体意识就是: 整数为键的pair是优先存储在数组中,table根据内容自动并且动态的对这两部分进行是的的分配, 图中的string是存储的hash中;

table的数据结构如下:

// hash表node的键结构
typedef union TKey {
  struct {
    TValuefields;
    int next;  /* for chaining (offset for next node) */
  } nk;
  TValue tvk;
} TKey;

// hash链表节点
typedef struct Node {
  TValue i_val;
  TKey i_key;
} Node;
// 表中包含有hash表node(长度lsizenode)和数组array(长度sizearray)两部分
typedef struct Table {
  CommonHeader;  // 公共头部
  lu_byte flags;  /* 1<<p means tagmethod(p) is not present */
  lu_byte lsizenode;  /* log2 of size of 'node' array */
  unsigned int sizearray;  /* size of 'array' array */
  TValue *array;  /* array part */
  Node *node;
  Node *lastfree;  /* any free position is before this position */
  struct Table *metatable;
  GCObject *gclist;
} Table;

table读取

论文说明:
这里写图片描述
即非负整数键都有可能存储在array部分, hash 使用了一个混合chained scatter table(链状发散表)和Brent’s变量类型的结构; (链状发散表就是指hash表头节点指向一个链表, 链表中的键相同)
表读取函数如下, 会根据具体不同的类型调用不同的哈希查找方法,比如int则是优先在array中查找:

/*
** main search function
*/
const TValue *luaH_get (Table *t, const TValue *key) {
  switch (ttype(key)) {
    case LUA_TSHRSTR: return luaH_getstr(t, tsvalue(key));
    case LUA_TNUMINT: return luaH_getint(t, ivalue(key));
    case LUA_TNIL: return luaO_nilobject;
    case LUA_TNUMFLT: {
      lua_Integer k;
      if (luaV_tointeger(key, &k, 0)) /* index is int? */
        return luaH_getint(t, k);  /* use specialized version */
      /* else... */
    }  /* FALLTHROUGH */
    default: {
      Node *n = mainposition(t, key);
      for (;;) {  /* check whether 'key' is somewhere in the chain */
        if (luaV_rawequalobj(gkey(n), key))
          return gval(n);  /* that's it */
        else {
          int nx = gnext(n);
          if (nx == 0) break;
          n += nx;
        }
      };
      return luaO_nilobject;
    }
  }
}

这里有分short string, int, nil, double几种查找,如下面是short string:


/*
** search function for short strings
*/
const TValue *luaH_getshortstr (Table *t, TString *key) {
  Node *n = hashstr(t, key); // 通过键查找到头节点
  lua_assert(key->tt == LUA_TSHRSTR);
  for (;;) {  /* check whether 'key' is somewhere in the chain */
    const TValue *k = gkey(n);
    if (ttisshrstring(k) && eqshrstr(tsvalue(k), key))
      return gval(n);  /* that's it */
    else {
      int nx = gnext(n);
      if (nx == 0) break;
      n += nx;
    }
  };
  return luaO_nilobject;
}

下面是int的获取方式, 可以看出, 当超出数组范围时就会查找hash表:

/*
** search function for integers
*/
const TValue *luaH_getint (Table *t, lua_Integer key) {
  /* (1 <= key && key <= t->sizearray) */
  if (l_castS2U(key - 1) < t->sizearray)
    return &t->array[key - 1];
  else {
    Node *n = hashint(t, key);
    for (;;) {  /* check whether 'key' is somewhere in the chain */
      if (ttisinteger(gkey(n)) && ivalue(gkey(n)) == key)
        return gval(n);  /* that's it */
      else {
        int nx = gnext(n);
        if (nx == 0) break;
        n += nx;
      }
    };
    return luaO_nilobject;
  }
}

在论文中经常提到mainpostion, 这个是指array中的位置或hash表键KEY对应的链表的头节点;

/*
** returns the 'main' position of an element in a table (that is, the index
** of its hash value)
*/
static Node *mainposition (const Table *t, const TValue *key) {
/*...*/
}

table写入

/*
** beware: when using this function you probably need to check a GC
** barrier and invalidate the TM cache.
*/
TValue *luaH_set (lua_State *L, Table *t, const TValue *key) {
  const TValue *p = luaH_get(t, key);
  if (p != luaO_nilobject)
    return cast(TValue *, p);
  else return luaH_newkey(L, t, key);
}

重点在luaH_newkey函数里,

/*
** inserts a new key into a hash table; first, check whether key's main
** position is free. If not, check whether colliding node is in its main
** position or not: if it is not, move colliding node to an empty place and
** put new key in its main position; otherwise (colliding node is in its main
** position), new key goes to an empty position.
检查mainpostion是不是冲突节点, 如果不是则给冲突节点重新分配内存, 并把自己写入mainpos;如果是则分配新空间把自己写人(这里不是特别理解)
*/
TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) {
  Node *mp;
  TValue aux;
  if (ttisnil(key)) luaG_runerror(L, "table index is nil");
  else if (ttisfloat(key)) {
    lua_Integer k;
    if (luaV_tointeger(key, &k, 0)) {  /* index is int? */  // float转换为int
      setivalue(&aux, k);
      key = &aux;  /* insert it as an integer */
    }
    else if (luai_numisnan(fltvalue(key)))
      luaG_runerror(L, "table index is NaN");
  }
  mp = mainposition(t, key);
  if (!ttisnil(gval(mp)) || isdummy(mp)) {  /* main position is taken? */
    Node *othern;
    Node *f = getfreepos(t);  /* get a free place */ // 通过lastfree域来查找新空闲节点
    if (f == NULL) {  /* cannot find a free place? */
      rehash(L, t, key);  /* grow table */    // rehash过程
      /* whatever called 'newkey' takes care of TM cache and GC barrier */
      return luaH_set(L, t, key);  /* insert key into grown table */
    }
    lua_assert(!isdummy(f));
    othern = mainposition(t, gkey(mp));
    /*.......*/
}

看rehash过程:

/*
** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i
*/
static void rehash (lua_State *L, Table *t, const TValue *ek) {
  unsigned int asize;  /* optimal size for array part */  // 数组中个数的最优个数
  unsigned int na;  /* number of keys in the array part */  // KEY个数
  unsigned int nums[MAXABITS + 1];
  int i;
  int totaluse;
  for (i = 0; i <= MAXABITS; i++) nums[i] = 0;  /* reset counts */
  // numusearray将array分为2^(i-1)~2^i个这样的片段来统计KEY的个数, 没有将所有的整数都存放与数组中, 而是将多于一半的整数KEY保存到数组;
  na = numusearray(t, nums);  /* count keys in array part */
  totaluse = na;  /* all those keys are integer keys */
  totaluse += numusehash(t, nums, &na);  /* count keys in hash part */
  /* count extra key */
  na += countint(ek, nums);
  totaluse++;
  /* compute new size for array part */
  // 下面的函数保证了空间一半以上被利用
  asize = computesizes(nums, &na);
  /* resize the table to new computed sizes */
  luaH_resize(L, t, asize, totaluse - na);
}

// 注: 数组只会增大, 而hash会增大或减小

TString

字符串是存放于全局hash表里, 存放内部化字符串即短字符串时也可能会需要将哈希链表扩大;

/*
** Header for string value; string bytes follow the end of this structure
** (aligned according to 'UTString'; see next).
字符串的头部, 数据跟随这个头部
*/
typedef struct TString {
  CommonHeader;
  lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
  lu_byte shrlen;  /* length for short strings */
  unsigned int hash;
  union {
    size_t lnglen;  /* length for long strings */
    struct TString *hnext;  /* linked list for hash table */
  } u;
} TString;

UserData

存储形式上和字符串相同, 但不追究’/0’

/*
** Header for userdata; memory area follows the end of this structure
** (aligned according to 'UUdata'; see next).
*/
typedef struct Udata {
  CommonHeader;
  lu_byte ttuv_;  /* user value's tag */
  struct Table *metatable;
  size_t len;  /* number of bytes */
  union Value user_;  /* user value */
} Udata;

栈和调用链

lua线程数据结构如下, 每个线程里都有一个指向全局的共享lua状态:
/*
** 'per thread' state
*/
struct lua_State {
  CommonHeader;
  lu_byte status;
  StkId top;  /* first free slot in the stack */
  global_State *l_G;   // 所有线程共享的全局状态, 真正的lua虚拟机
 /**....**/
  StkId stack_last;  /* last free slot in the stack */
  StkId stack;  /* stack base */
  UpVal *openupval;  /* list of open upvalues in this stack */  // 指向栈的openupval
  GCObject *gclist;    /// 垃圾回收
 /**....**/
};
lua_State的所有的lua C API都是围绕这个状态机来改变状态的, 独立在线程栈里操作;
而全局共享的真正虚拟机是如下说明的:
/*
** 'global state', shared by all threads of this state
*/
typedef struct global_State {
/**.....**/
}
状态机的栈信息数据结构StkId
看到下面的定义可以知道, StkId就是一个TValuefields宏定义的结构, 该结构包含Value value_;int tt_ 两部分, value_是联合值,Value类型, tt_是说明联合对象的类型; 由上面Value的结构可知, 它是一个由{垃圾回收类型;void*的light userdata; booleans; functions; integer; number;} 这些类型组合的联合类型, 所以需要一个tt_来说明当前的TValue到底是什么类型;
typedef TValue *StkId;  /* index to stack elements */

#define BASIC_STACK_SIZE        (2*LUA_MINSTACK)  // 栈大小

/* minimum Lua stack available to a C function */
 #define LUA_MINSTACK   20

栈的初始化

数据栈和调用栈共享了lua的线程, 同一个虚拟机中不同线程共享了global_State;

// 栈这边代码的还没有仔细研究
static void stack_init (lua_State *L1, lua_State *L) {
  int i; CallInfo *ci;   // CallInfo是当前函数的调用栈, 以双向链表的形式存在与线程对象里 
  /* initialize stack array */
  L1->stack = luaM_newvector(L, BASIC_STACK_SIZE, TValue);  // 初始化长度
  L1->stacksize = BASIC_STACK_SIZE;
  for (i = 0; i < BASIC_STACK_SIZE; i++)
    setnilvalue(L1->stack + i);  /* erase new stack */
  L1->top = L1->stack;
  L1->stack_last = L1->stack + L1->stacksize - EXTRA_STACK;
  /* initialize first ci */
  ci = &L1->base_ci;
  ci->next = ci->previous = NULL;
  ci->callstatus = 0;
  ci->func = L1->top;
  setnilvalue(L1->top++);  /* 'function' entry for this 'ci' */
  ci->top = L1->top + LUA_MINSTACK;
  L1->ci = ci;
}

线程

数据栈和调用栈构成了lua的线程, 同一个虚拟机中不同线程共享了global_State
参考lua_newthread的创建过程:

// lua_newstate创建的是lua虚拟机
LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) {
  /***.....**/
}
// lua_newthread是线程
LUA_API lua_State *lua_newthread (lua_State *L) {
  global_State *g = G(L);
  lua_State *L1;
  lua_lock(L);
  luaC_checkGC(L);
  /* create new thread */
  // LX: thread state + extra space
  L1 = &cast(LX *, luaM_newobject(L, LUA_TTHREAD, sizeof(LX)))->l;
  L1->marked = luaC_white(g);
  L1->tt = LUA_TTHREAD;  // 类型
  /* link it on list 'allgc' */  // 挂到垃圾回收上
  L1->next = g->allgc;
  g->allgc = obj2gco(L1);
  /* anchor it on L stack */
  setthvalue(L, L->top, L1);
  api_incr_top(L);
  preinit_thread(L1, g);
  L1->hookmask = L->hookmask;
  L1->basehookcount = L->basehookcount;
  L1->hook = L->hook;
  resethookcount(L1);
  /* initialize L1 extra space */
  memcpy(lua_getextraspace(L1), lua_getextraspace(g->mainthread),
         LUA_EXTRASPACE);
  luai_userstatethread(L, L1);
  stack_init(L1, L);  /* init stack */
  lua_unlock(L);
  return L1;
}

lua C API

一般的如lua_pushstring之类的理解不难, 现在看一个lua_pushvalue的代码:

LUA_API void lua_pushvalue (lua_State *L, int idx) {
  lua_lock(L);
  setobj2s(L, L->top, index2addr(L, idx)); // 顶部压值
  api_incr_top(L);
  lua_unlock(L);
}
// index2addr的实现
static TValue *index2addr (lua_State *L, int idx) {
  CallInfo *ci = L->ci;   // 调用栈
  if (idx > 0) {  // 正索引
    TValue *o = ci->func + idx;  // 被调用函数的栈底+idx索引找到对应的值
    api_check(L, idx <= ci->top - (ci->func + 1), "unacceptable index");
    if (o >= L->top) return NONVALIDVALUE;
    else return o;
  }
  else if (!ispseudo(idx)) {  /* negative index */   // 负索引
    api_check(L, idx != 0 && -idx <= L->top - (ci->func + 1), "invalid index");
    return L->top + idx;
  }
  else if (idx == LUA_REGISTRYINDEX)   // 全局
    return &G(L)->l_registry;
  else {  /* upvalues */
    idx = LUA_REGISTRYINDEX - idx;
    api_check(L, idx <= MAXUPVAL + 1, "upvalue index too large");
    if (ttislcf(ci->func))  /* light C function? */
      return NONVALIDVALUE;  /* it has no upvalues */
    else {
      CClosure *func = clCvalue(ci->func);
      return (idx <= func->nupvalues) ? &func->upvalue[idx-1] : NONVALIDVALUE;
    }
  }
}