Lua5.2.3源码阅读--Table(ipairs,pairs)

最新推荐文章于 2023-02-11 21:48:49 发布

yyjd8088

最新推荐文章于 2023-02-11 21:48:49 发布

阅读量690

点赞数

分类专栏： lua 文章标签： lua 阅读源码

lua 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

这篇文章不错，解决了困扰我的问题，转过来纪念一下

“#”lua表长度计算

 local a = {};
    a[1] = 10;
    a[2] = 20;
    a[3] = 30;
    a[4] = 40;
    --a[6] = 60;
    a[7] = 70;

    print(#a);  -->4

  local a = {};
    a[1] = 10;
    a[2] = 20;
    a[3] = 30;
    a[4] = 40;
    a[6] = 60;
    a[7] = 70;

    print(#a);  -->7

这里就比较奇怪了，得到的不是table的长度。Lua文档中说，如果是一个连续的序列，那么才可以用#计算table的长度，如果中间有nil值，计算将会有问题。那就到源码中找找答案。#计算长度调用的是lua_rawlen函数，该函数如下。

LUA_API size_t lua_rawlen (lua_State *L, int idx) {
  StkId o = index2addr(L, idx);
  switch (ttypenv(o)) {
    case LUA_TSTRING: return tsvalue(o)->len;
    case LUA_TUSERDATA: return uvalue(o)->len;
    case LUA_TTABLE: return luaH_getn(hvalue(o));
    default: return 0;
  }
}

如果是table类型，调用luaH_getn函数，函数如下。

/*
** Try to find a boundary in table `t'. A `boundary' is an integer index
** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
*/
int luaH_getn (Table *t) {
  unsigned int j = t->sizearray;
  if (j > 0 && ttisnil(&t->array[j - 1])) {
    /* there is a boundary in the array part: (binary) search for it */
    unsigned int i = 0;
    while (j - i > 1) {
      unsigned int m = (i+j)/2;
      if (ttisnil(&t->array[m - 1])) j = m;
      else i = m;
    }
    return i;
  }
  /* else must find a boundary in hash part */
  else if (isdummy(t->node))  /* hash part is empty? */
    return j;  /* that is easy... */
  else return unbound_search(t, j);
}
static int unbound_search (Table *t, unsigned int j) {
  unsigned int i = j;  /* i is zero or a present index */
  j++;
  /* find `i' and `j' such that i is present and j is not */
  while (!ttisnil(luaH_getint(t, j))) {
    i = j;
    j *= 2;
    if (j > cast(unsigned int, MAX_INT)) {  /* overflow? */
      /* table was built with bad purposes: resort to linear search */
      i = 1;
      while (!ttisnil(luaH_getint(t, i))) i++;
      return i - 1;
    }
  }
  /* now do a binary search between them */
  while (j - i > 1) {
    unsigned int m = (i+j)/2;
    if (ttisnil(luaH_getint(t, m))) j = m;
    else i = m;
  }
  return i;
}

(1)如果数组部分长度为0，那么返回值为0。
(2)如果数组满，返回sizearray
(3)如果数组有值，且array[sizearray - 1]不为nil，那么按照二分查找，找到第一个有非nil到nil的位置，从而返回数组部分的大小。如果数组部分从1到n不存在的nil，那么这个二分查找的过程是稳定，大小就是table的大小。如果其中存在nil值，那这个二分查找的过程不稳定，得到的值随nil分布的不同而不同。说明在脚本中要慎用”#”去table的长度，除非确定是连续序列。这里很好的解释了刚才取长度异常的问题。

前面已经分析了table的查找，赋值，下面再来看看table的遍历，遍历的函数是luaH_next。

int luaH_next (lua_State *L, Table *t, StkId key) {
  int i = findindex(L, t, key);  /* find original element */
  for (i++; i < t->sizearray; i++) {  /* try first array part */
    if (!ttisnil(&t->array[i])) {  /* a non-nil value? */
      setnvalue(key, cast_num(i+1));
      setobj2s(L, key+1, &t->array[i]);
      return 1;
    }
  }
  for (i -= t->sizearray; i < sizenode(t); i++) {  /* then hash part */
    if (!ttisnil(gval(gnode(t, i)))) {  /* a non-nil value? */
      setobj2s(L, key, gkey(gnode(t, i)));
      setobj2s(L, key+1, gval(gnode(t, i)));
      return 1;
    }
  }
  return 0;  /* no more elements */
}

通过findindex获得一个位置值，如果是在数组部分，就查找数组部分的下一个不为nil的元素。如果找到，就直接返回下一个值，如果未找到，就到hash部分查找。Hash部分从头部开始查找，不会用里面的next指针，直接采用hash数组，从头到尾找。

Table rehash函数
通过代码查找可以看出，rehash只出现在luaH_newkey，也就是在分配新元素时，空间不够，就会触发rehash操作。Rehash源码如下

static void rehash (lua_State *L, Table *t, const TValue *ek) {
  int nasize, na;
  int nums[MAXBITS+1];  /* nums[i] = number of keys with 2^(i-1) < k <= 2^i */
  int i;
  int totaluse;
  for (i=0; i<=MAXBITS; i++) nums[i] = 0;  /* reset counts */
  nasize = numusearray(t, nums);  /* count keys in array part */
  totaluse = nasize;  /* all those keys are integer keys */
  totaluse += numusehash(t, nums, &nasize);  /* count keys in hash part */
  /* count extra key */
  nasize += countint(ek, nums);  /* 新元素知否占据数组部分位置 */
  totaluse++;
  /* compute new size for array part */
  na = computesizes(nums, &nasize);
  /* resize the table to new computed sizes */
  luaH_resize(L, t, nasize, totaluse - na);
}

nums数组用具记录每个段的有效值的个数，段的大小是(2^n,2^(n+1))(n>=0)。numusearray返回数组部分的有效值个数，并填充nums数组。numusehash函数用于计算hash部分有效值的个数，如果其中有key为整数，那么也在nums中记录，第三个参数用于记录总的key为整数的元素个数。countint函数判断新的key是否需要记录在nums中。computesizes这是个关键函数，返回一个int值na，表示总的nasize中，有na个将会放在rehash后的数组中。computesizes代码如下。

static int computesizes (int nums[], int *narray) {
  int i;
  int twotoi;  /* 2^i */
  int a = 0;  /* number of elements smaller than 2^i */
  int na = 0;  /* number of elements to go to array part */
  int n = 0;  /* optimal size for array part */
  for (i = 0, twotoi = 1; twotoi/2 < *narray; i++, twotoi *= 2) {
    if (nums[i] > 0) {
      a += nums[i];
      if (a > twotoi/2) {  /* more than half elements present? */
        n = twotoi;  /* optimal size (till now) */
        na = a;  /* all elements smaller than n will go to array part */
      }
    }
    if (a == *narray) break;  /* all elements already counted */
  }
  *narray = n;
  lua_assert(*narray/2 <= na && na <= *narray);
  return na;
}

判断的关键是，计算到某个段位时，如果有效值的个数大于总个数的一半，就把位置赋给n，个数赋给na。依次遍历整个nums数组，就会得到最大的位置n。根据计算出来的nasize和nhsize，调用luaH_resize重新分配table空间。

最后回顾下ipairs和pairs，前者指访问数组部分，在遇到nil的时候就停止遍历，后面即使有值也不会访问到。Pairs是整体遍历，只有有值，就会访问到。

table总结：在遍历table的时候，不允许向table中插入新的元素，这可能会引起rehash，hash表的修改，导致行为未知。删除table中的元素，直接将该值置为nil。使用#计算数组部分的长度的时候，很容易出错，使用时要谨慎。

原贴： http://blog.csdn.net/murisly/article/details/46885327