QuickJS 字节码打印
QuickJS执行JavaScript代码时,先将源码解析成字节码,然后再执行字节码。字节码由操作码和操作数组成。QuickJS支持字节码的打印。我们只需要开启一个宏定义即可将字节码打印出来。
/* dump the bytecode of the compiled functions: combination of bits
1: dump pass 3 final byte code
2: dump pass 2 code
4: dump pass 1 code
8: dump stdlib functions
16: dump bytecode in hex
32: dump line number table
*/
#define DUMP_BYTECODE (7)
QuickJS将JavaScript代码转化成字节码时,有三个阶段。第一个阶段,解析JavaScript源码,生成最原始的字节码;第二阶段,处理变量相关的功能。确定变量来源和作用域,然后将其字节码换成更确切的字节码;第三个阶段确定label跳转地址,合并指令,计算栈的长度。此外,我们还可以打印stdilb函数、十六进制的字节码和行号表。可以根据需要打印的内容,定义DUMP_BYTECODE的值。
//字节码打印代码之一
#if defined(DUMP_BYTECODE) && (DUMP_BYTECODE & 4)
if (!(fd->js_mode & JS_MODE_STRIP)) {
printf("pass 1\n");
dump_byte_code(ctx, 1, fd->byte_code.buf, fd->byte_code.size,
fd->args, fd->arg_count, fd->vars, fd->var_count,
fd->closure_var, fd->closure_var_count,
fd->cpool, fd->cpool_count, fd->source, fd->line_num,
fd->label_slots, NULL);
printf("\n");
}
#endif
//操作码
typedef struct JSOpCode {
#ifdef DUMP_BYTECODE
const char *name;
#endif
uint8_t size; /* in bytes */
/* the opcodes remove n_pop items from the top of the stack, then
pushes n_push items */
uint8_t n_pop;
uint8_t n_push;
uint8_t fmt;
} JSOpCode;
//tab表示字节码数组,len字节码数组长度
//args表示函数参数数组,arg_count函数参数个数
//vars表示函数变量,var_count函数变量个数
//closure_var表示函数闭包变量,closure_var_count闭包变量数量
//cpool 是一个 JSValue 结构体,JSValue 是最基本的单位,这个结构体会有一个 tag 来标示 JSValue 的类型,值是保存在 JSValueUnion 里,值可以是整型和浮点,也可以是一个对象的指针,指针指向的对象是由引用计数来进行管理的,引用计数结构体是 JSRefCountHeader。这里 JSValue 的值是个数组。
//source表示源码数组 line_num行号
static void dump_byte_code(JSContext *ctx, int pass,
const uint8_t *tab, int len,
const JSVarDef *args, int arg_count,
const JSVarDef *vars, int var_count,
const JSClosureVar *closure_var, int closure_var_count,
const JSValue *cpool, uint32_t cpool_count,
const char *source, int line_num,
const LabelSlot *label_slots, JSFunctionBytecode *b)
{
const JSOpCode *oi;
int pos, pos_next, op, size, idx, addr, line, line1, in_source;
uint8_t *bits = js_mallocz(ctx, len * sizeof(*bits));
BOOL use_short_opcodes = (b != NULL);
/* scan for jump targets */
//遍历字节码,将字节码中是label的地方寻找出来并标记到bits数组中。
for (pos = 0; pos < len; pos = pos_next) {
op = tab[pos];//获取操作码字节
//将字节码转化成操作码
if (use_short_opcodes)
oi = &short_opcode_info(op);
else
oi = &opcode_info[op];
//更具操作码长度,计算下一个操作码位置
pos_next = pos + oi->size;
//判断字节码是否正确,只有正确才解析
if (op < OP_COUNT) {
//判断操作码的格式
switch (oi->fmt) {
#if SHORT_OPCODES
case OP_FMT_label8:
pos++;
addr = (int8_t)tab[pos];
goto has_addr;
case OP_FMT_label16:
pos++;
addr = (int16_t)get_u16(tab + pos);
goto has_addr;
#endif
case OP_FMT_atom_label_u8:
case OP_FMT_atom_label_u16:
pos += 4;
/* fall thru */
case OP_FMT_label:
case OP_FMT_label_u16:
pos++;
addr = get_u32(tab + pos);
goto has_addr;
has_addr:
if (pass == 1)
addr = label_slots[addr].pos;
if (pass == 2)
addr = label_slots[addr].pos2;
if (pass == 3)
addr += pos;
if (addr >= 0 && addr < len)
bits[addr] |= 1;
break;
}
}
}
in_source = 0;
if (source) {
/* Always print first line: needed if single line */
print_lines(source, 0, 1);
in_source = 1;
}
line1 = line = 1;
pos = 0;
//遍历
while (pos < len) {
op = tab[pos];
if (source) {
if (b) {
line1 = find_line_num(ctx, b, pos) - line_num + 1;
} else if (op == OP_line_num) {
line1 = get_u32(tab + pos + 1) - line_num + 1;
}
if (line1 > line) {
if (!in_source)
printf("\n");
in_source = 1;
print_lines(source, line, line1);
line = line1;
//bits[pos] |= 2;
}
}
if (in_source)
printf("\n");
in_source = 0;
if (op >= OP_COUNT) {
printf("invalid opcode (0x%02x)\n", op);
pos++;
continue;
}
if (use_short_opcodes)
oi = &short_opcode_info(op);
else
oi = &opcode_info[op];
size = oi->size;
if (pos + size > len) {
printf("truncated opcode (0x%02x)\n", op);
break;
}
#if defined(DUMP_BYTECODE) && (DUMP_BYTECODE & 16)
{
int i, x, x0;
x = x0 = printf("%5d ", pos);
for (i = 0; i < size; i++) {
if (i == 6) {
printf("\n%*s", x = x0, "");
}
x += printf(" %02X", tab[pos + i]);
}
printf("%*s", x0 + 20 - x, "");
}
#endif
//打印label
if (bits[pos]) {
printf("%5d: ", pos);
} else {
printf(" ");
}
//打印操作码名称
printf("%s", oi->name);
pos++;
switch(oi->fmt) {
case OP_FMT_none_int:
printf(" %d", op - OP_push_0);
break;
case OP_FMT_npopx:
printf(" %d", op - OP_call0);
break;
case OP_FMT_u8:
printf(" %u", get_u8(tab + pos));
break;
case OP_FMT_i8:
printf(" %d", get_i8(tab + pos));
break;
case OP_FMT_u16:
case OP_FMT_npop:
printf(" %u", get_u16(tab + pos));
break;
case OP_FMT_npop_u16:
printf(" %u,%u", get_u16(tab + pos), get_u16(tab + pos + 2));
break;
case OP_FMT_i16:
printf(" %d", get_i16(tab + pos));
break;
case OP_FMT_i32:
printf(" %d", get_i32(tab + pos));
break;
case OP_FMT_u32:
printf(" %u", get_u32(tab + pos));
break;
#if SHORT_OPCODES
case OP_FMT_label8:
addr = get_i8(tab + pos);
goto has_addr1;
case OP_FMT_label16:
addr = get_i16(tab + pos);
goto has_addr1;
#endif
case OP_FMT_label:
addr = get_u32(tab + pos);
goto has_addr1;
has_addr1:
if (pass == 1)
printf(" %u:%u", addr, label_slots[addr].pos);
if (pass == 2)
printf(" %u:%u", addr, label_slots[addr].pos2);
if (pass == 3)
printf(" %u", addr + pos);
break;
case OP_FMT_label_u16:
addr = get_u32(tab + pos);
if (pass == 1)
printf(" %u:%u", addr, label_slots[addr].pos);
if (pass == 2)
printf(" %u:%u", addr, label_slots[addr].pos2);
if (pass == 3)
printf(" %u", addr + pos);
printf(",%u", get_u16(tab + pos + 4));
break;
#if SHORT_OPCODES
case OP_FMT_const8:
idx = get_u8(tab + pos);
goto has_pool_idx;
#endif
case OP_FMT_const:
idx = get_u32(tab + pos);
goto has_pool_idx;
has_pool_idx:
printf(" %u: ", idx);
if (idx < cpool_count) {
JS_DumpValue(ctx, cpool[idx]);
}
break;
case OP_FMT_atom:
printf(" ");
print_atom(ctx, get_u32(tab + pos));
break;
case OP_FMT_atom_u8://表明操作数是字符串和u8,因此,下面是先打印字符串,后打印数字
printf(" ");
print_atom(ctx, get_u32(tab + pos));
printf(",%d", get_u8(tab + pos + 4));
break;
case OP_FMT_atom_u16:
printf(" ");
print_atom(ctx, get_u32(tab + pos));
printf(",%d", get_u16(tab + pos + 4));
break;
case OP_FMT_atom_label_u8:
case OP_FMT_atom_label_u16:
printf(" ");
print_atom(ctx, get_u32(tab + pos));
addr = get_u32(tab + pos + 4);
if (pass == 1)
printf(",%u:%u", addr, label_slots[addr].pos);
if (pass == 2)
printf(",%u:%u", addr, label_slots[addr].pos2);
if (pass == 3)
printf(",%u", addr + pos + 4);
if (oi->fmt == OP_FMT_atom_label_u8)
printf(",%u", get_u8(tab + pos + 8));
else
printf(",%u", get_u16(tab + pos + 8));
break;
case OP_FMT_none_loc:
idx = (op - OP_get_loc0) % 4;
goto has_loc;
case OP_FMT_loc8:
idx = get_u8(tab + pos);
goto has_loc;
case OP_FMT_loc:
idx = get_u16(tab + pos);
has_loc:
printf(" %d: ", idx);
if (idx < var_count) {
print_atom(ctx, vars[idx].var_name);
}
break;
case OP_FMT_none_arg:
idx = (op - OP_get_arg0) % 4;
goto has_arg;
case OP_FMT_arg:
idx = get_u16(tab + pos);
has_arg:
printf(" %d: ", idx);
if (idx < arg_count) {
print_atom(ctx, args[idx].var_name);
}
break;
case OP_FMT_none_var_ref:
idx = (op - OP_get_var_ref0) % 4;
goto has_var_ref;
case OP_FMT_var_ref:
idx = get_u16(tab + pos);
has_var_ref:
printf(" %d: ", idx);
if (idx < closure_var_count) {
print_atom(ctx, closure_var[idx].var_name);
}
break;
default:
break;
}
printf("\n");
pos += oi->size - 1;
}
if (source) {
if (!in_source)
printf("\n");
print_lines(source, line, INT32_MAX);
}
js_free(ctx, bits);
}
字节码的打印由dump_byte_code来实现。它分两部来完成功能。第一步遍历了字节码,找出label的位置,标记在bits数组中。第二步,重新遍历字节码,逐步将操作码+操作数作为单个整体解析出来。单个操作码+操作数取出来之后,就打印该内容。先是打印对应的十六进制字节码。接着根据第一步设置的bits判断该字节码是否是label。如果是就打印标签,否则就打印空白字符。紧接着是打印操作码的名称,然后根据操作码对应的操作数类型,将其打印出来。
操作码的定义
typedef struct JSOpCode {
#ifdef DUMP_BYTECODE
const char *name;
#endif
uint8_t size; /* in bytes */
/* the opcodes remove n_pop items from the top of the stack, then
pushes n_push items */
uint8_t n_pop;
uint8_t n_push;
uint8_t fmt;
} JSOpCode;
举个例子,比如我们解析到了一个操作码是:scope_put_var_init。它对应的操作码结构体是
{
"scope_put_var_init", //name
7,//size in bytes
0, //n_pop
2, //n_push
OP_FMT_atom_u16 //fmt
}
假设没有标签,我们首先会打印操作码的名称,也就是scope_put_var_init,接着根据操作数类型是OP_FMT_atom_u16,跳转到对应的case进行打印。
case OP_FMT_atom_u16:
printf(" ");
print_atom(ctx, get_u32(tab + pos));
printf(",%d", get_u16(tab + pos + 4));
break;
可以看到,先打印了紧随其后的一个字符串。然后再打印了一个数字。
什么含义呢?这个需要回到生成字节码的源码中去看了。
//
emit_op(s, (tok == TOK_CONST || tok == TOK_LET) ?
OP_scope_put_var_init : OP_scope_put_var);
emit_atom(s, name);
emit_u16(s, fd->scope_level);
从QuickJS代码中,可以看到,先是在字节码中添加OP_scope_put_var_init操作码,然后再添加操作数1–name,然后再添加操作数2–scope_level。