基于lua源码版本5.3.3分析
先看这个函数的实现部分
LUA_API const char *lua_pushfstring (lua_State *L, const char *fmt, ...) {
const char *ret;
va_list argp;
lua_lock(L);
va_start(argp, fmt);
ret = luaO_pushvfstring(L, fmt, argp);
va_end(argp);
luaC_checkGC(L);
lua_unlock(L);
return ret;
}
1,宏处理
默认没有定义 宏lua_lock,所以lua_unlock(L);lua_lock(L);没有任何功能
#if !defined(lua_lock)
#define lua_lock(L) ((void) 0)
#define lua_unlock(L) ((void) 0)
#endif
void va_start( va_list arg_ptr, prev_param );
type va_arg( va_list arg_ptr, type );
void va_end( va_list arg_ptr );
2,可变参数处理
va_list,va_start,va_end 这些宏定义在stdarg.h中
使用可变参数应该有以下步骤:
首先在函数里定义一个\va_list型的变量,这里是argp,这个变
量是指向参数的指针.
2)然后用va_start宏初始化变量argp,这个宏的第二个参数是第
一个可变参数的前一个参数,是一个固定的参数. 这里是fmt
3)然后用vagp返回可变的参数, type是返回的类型
4)最后用va_end宏结束可变参数的获取
3,函数实际实现部分
lobject.c文件
const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) {
int n = 0;
for (;;) {
const char *e = strchr(fmt, '%');
if (e == NULL) break; //没有%直接跳出
pushstr(L, fmt, e - fmt); //放入%前面的字符
//通过switch %分号后面一个字符,返回指定格式的数据
switch (*(e+1)) {
case 's': { /* zero-terminated string */
const char *s = va_arg(argp, char *);
if (s == NULL) s = "(null)";
pushstr(L, s, strlen(s));
break;
}
case 'c': { /* an 'int' as a character */
char buff = cast(char, va_arg(argp, int));
if (lisprint(cast_uchar(buff)))
pushstr(L, &buff, 1);
else /* non-printable character; print its code */
luaO_pushfstring(L, "<\\%d>", cast_uchar(buff));
break;
}
case 'd': { /* an 'int' */
setivalue(L->top, va_arg(argp, int));
goto top2str;
}
case 'I': { /* a 'lua_Integer' */
setivalue(L->top, cast(lua_Integer, va_arg(argp, l_uacInt)));
goto top2str;
}
case 'f': { /* a 'lua_Number' */
setfltvalue(L->top, cast_num(va_arg(argp, l_uacNumber)));
top2str: /* convert the top element to a string */
luaD_inctop(L);
luaO_tostring(L, L->top - 1);
break;
}
case 'p': { /* a pointer */
char buff[4*sizeof(void *) + 8]; /* should be enough space for a '%p' */
int l = l_sprintf(buff, sizeof(buff), "%p", va_arg(argp, void *));
pushstr(L, buff, l);
break;
}
case 'U': { /* an 'int' as a UTF-8 sequence */
char buff[UTF8BUFFSZ];
int l = luaO_utf8esc(buff, cast(long, va_arg(argp, long)));
pushstr(L, buff + UTF8BUFFSZ - l, l);
break;
}
case '%': {
pushstr(L, "%", 1);
break;
}
default: {
luaG_runerror(L, "invalid option '%%%c' to 'lua_pushfstring'",
*(e + 1));
}
}
n += 2; //连接字符计数
fmt = e+2; //跳过前面已经处理过的字符
}
luaD_checkstack(L, 1);
//放置占位符后面的字符串
pushstr(L, fmt, strlen(fmt));
//连接字符,最后只保留一个
if (n > 0) luaV_concat(L, n + 1);
return svalue(L->top - 1);
}
4,pushstr向L栈顶中放入TString 格式字符的地址
//
//lua中字符串用一个TString的结构体表示
typedef struct TString {
CommonHeader;
lu_byte extra; /* reserved words for short strings; "has hash" for longs */
lu_byte shrlen; /* length for short strings */
unsigned int hash;
union {
size_t lnglen; /* length for long strings */
struct TString *hnext; /* linked list for hash table */
} u;
} TString;
//相关宏,lua宏真多啊
#define setsvalue2s setsvalue //设置(set)字符串值(svalue)到(2)栈(s)
#define setsvalue(L,obj,x) \
{ TValue *io = (obj); TString *x_ = (x); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(x_->tt)); \
checkliveness(L,io); }
//宏获取lua_TValue的value_部分
#define val_(o) ((o)->value_)
/宏设置lua_TValue的tt_部分 这个tt_有额外标志
#define settt_(o,t) ((o)->tt_=(t))
#define LUA_TNIL 0
#define LUA_TBOOLEAN 1
#define LUA_TLIGHTUSERDATA 2
#define LUA_TNUMBER 3
#define LUA_TSTRING 4
#define LUA_TTABLE 5
#define LUA_TFUNCTION 6
//把lua对象转为GCObject *
//这里TString*通过GCUnion间接转为GCObject *??源码是这个意思,但是为啥不直接强转呢
//能直接强转的前提因为他们的首部都是CommonHeader,且其中包含数据类型
#define obj2gco(v) \
check_exp(novariant((v)->tt) < LUA_TDEADKEY, (&(cast_u(v)->gc)))
//以下强转也通过暂未发现什么问题
#define obj2gco(v) ((GCObject *)v)
//一些测试宏,默认皆为else后面
/* internal assertions for in-house debugging */
#if defined(lua_assert)
#define check_exp(c,e) (lua_assert(c), (e))
/* to avoid problems with conditions too long */
#define lua_longassert(c) ((c) ? (void)0 : lua_assert(0))
#else
#define lua_assert(c) ((void)0)
#define check_exp(c,e) (e)
#define lua_longassert(c) ((void)0)
#endif
//这里是把TString*转为 GCUnion *
#define cast_u(o) cast(union GCUnion *, (o))
union GCUnion {
GCObject gc; /* common header */
struct TString ts;
struct Udata u;
union Closure cl;
struct Table h;
struct Proto p;
struct lua_State th; /* thread */
};
/* Bit mark for collectable types */
#define BIT_ISCOLLECTABLE (1 << 6)
/* mark a tag as collectable */
#define ctb(t) ((t) | BIT_ISCOLLECTABLE)
设置tt_第七位为1,tt为整形4相或
tt=0X44 tt_为4,所以gc 中的类型和lua_TValue的类型表示还是有点区别
总结:其实就是设置lua_TValue的value_部分和tt_部分
而字符串是可回收对象,所以放入联合体Value中gc 部分
//把字符串的地址放入L->top指向的地址中,top指针自增1
static void pushstr (lua_State *L, const char *str, size_t l) {
setsvalue2s(L, L->top, luaS_newlstr(L, str, l));
luaD_inctop(L);
}
void luaD_inctop (lua_State *L) {
luaD_checkstack(L, 1);
L->top++;
}
/* In general, 'pre'/'pos' are empty (nothing to save) */
#define luaD_checkstack(L,n) luaD_checkstackaux(L,n,(void)0,(void)0)
/*
** Macro to check stack size and grow stack if needed. Parameters
** 'pre'/'pos' allow the macro to preserve a pointer into the
** stack across reallocations, doing the work only when needed.
** 'condmovestack' is used in heavy tests to force a stack reallocation
** at every check.
*/
#define luaD_checkstackaux(L,n,pre,pos) \
if (L->stack_last - L->top <= (n)) \
{ pre; luaD_growstack(L, n); pos; } else { condmovestack(L,pre,pos); }
5,luaS_newlstr创建lua字符串TString
/*
** Maximum length for short strings, that is, strings that are
** internalized. (Cannot be smaller than reserved words or tags for
** metamethods, as these strings must be internalized;
** #("function") = 8, #("__newindex") = 10.)
*/
#if !defined(LUAI_MAXSHORTLEN)
#define LUAI_MAXSHORTLEN 40
#endif
//在lua中分short string 和long string
//short string默认最大长度为40
//short string相对简单分析
/*
** new string (with explicit length)
*/
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
if (l <= LUAI_MAXSHORTLEN) /* short string? */
return internshrstr(L, str, l);
else {
TString *ts;
if (l >= (MAX_SIZE - sizeof(TString))/sizeof(char))
luaM_toobig(L);
ts = luaS_createlngstrobj(L, l);
memcpy(getstr(ts), str, l * sizeof(char));
return ts;
}
}
6,内部短字符实现 internshrstr
/*
** checks whether short string exists and reuses it or creates a new one
*/
static TString *internshrstr (lua_State *L, const char *str, size_t l) {
TString *ts;
global_State *g = G(L);
unsigned int h = luaS_hash(str, l, g->seed);
TString **list = &g->strt.hash[lmod(h, g->strt.size)];
lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */
for (ts = *list; ts != NULL; ts = ts->u.hnext) {
if (l == ts->shrlen &&
(memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
/* found! */
if (isdead(g, ts)) /* dead (but not collected yet)? */
changewhite(ts); /* resurrect it */
return ts;
}
}
if (g->strt.nuse >= g->strt.size && g->strt.size <= MAX_INT/2) {
luaS_resize(L, g->strt.size * 2);
list = &g->strt.hash[lmod(h, g->strt.size)]; /* recompute with new size */
}
ts = createstrobj(L, l, LUA_TSHRSTR, h);
memcpy(getstr(ts), str, l * sizeof(char));
ts->shrlen = cast_byte(l);
ts->u.hnext = *list;
*list = ts;
g->strt.nuse++;
return ts;
}
7,可回收对象用一个链表链接起来且新的对象放在表头
/*
** create a new collectable object (with given type and size) and link
** it to 'allgc' list.
*/
GCObject *luaC_newobj (lua_State *L, int tt, size_t sz) {
global_State *g = G(L);
GCObject *o = cast(GCObject *, luaM_newobject(L, novariant(tt), sz));
o->marked = luaC_white(g);
o->tt = tt;
o->next = g->allgc;
g->allgc = o;
return o;
}
luaC_checkGC暂时先不管,和GC有关
总结:
1把string包装成TString
2再把TString的地址放入L->top栈顶,这里的栈不是L而是其中的stack
3TString在内部还分short和long,并且有shotr string 有hash链表
marked标志位
可回收对象有三种数据类型
white 表示没有标记 1
gray 表示标记,但是它的引用还未标记 2
black 表示对象和它的引用都已标记 4
其中短字符串内部hash链表未看实现,长字符串也未跟踪,已经明白大概逻辑,目的达到,
以后若继续深入再看具体实现