1. 引言
Polygon zkEVM采用zkASM(zero-knowledge Assembly language)语言来解析EVM bytecode。
zkASM编译器代码见:
- https://github.com/0xPolygonHermez/zkasmcom:负责将.zkasm编译为json文件供zkExecutor使用。
- https://github.com/0xPolygonHermez/zkevm-storage-rom:为storage二级状态机的zkASM编译器。负责将storage zkasm文件编译为json文件。
- https://github.com/0xPolygonHermez/zkevm-rom:包含了zkEVM中的zkasm源代码。
本文重点关注zkasm代码,其主要依赖3个库:
- yargs:交互式命令行工具,负责参数解析。
- ffjavascript:Finite Field Library in Javascript。
- jison:一个用JavaScript语言实现的一个语法分析器生成器。
"build_parser_zkasm": "mkdir -p build; ./node_modules/.bin/jison src/zkasm_parser.jison -o build/zkasm_parser.js",
"build_parser_command": "mkdir -p build; ./node_modules/.bin/jison src/command_parser.jison -o build/command_parser.js",
"build": "npm run build_parser_zkasm && npm run build_parser_command"
npm run build
会生成2个解析器文件:
- 1)zkasm_parser.js:compile.js中调用
const lines = zkasm_parser.parse(src);
对 *.zkasm文件进行编译。 - 2)command_parser.js:当为main入口时,调用
cmdList[i] = command_parser.parse(cmdList[i]);
对command进行解析。
zkasm中的常量参数有:【STEP和ROTL_C为只读寄存器。】
const maxConst = (1n << 32n) - 1n;
const minConst = -(1n << 31n);
const maxConstl = (1n << 256n) - 1n;
const minConstl = -(1n << 255n);
const readOnlyRegisters = ['STEP', 'ROTL_C'];
以arrays.zkasm为例:【冒号右侧为OPCODE,对应的相应常量多项式设置见zkasm_parser.jison
中的op内相应操作码的设置,如ARITH
操作码对应$$ = { arith: 1, arithEq0: 1}
,表示会设置arith和arithEq0常量多项式在该行的值为1。】
VAR GLOBAL a[100] # 以type、scope、name、count来描述。
VAR GLOBAL b
VAR GLOBAL c[300]
VAR GLOBAL d
start: # 对应type为“label”,identifier为“start”,line为所在代码行,此处为6。里面的每行type为“step”。
STEP => A
0 :ASSERT
1 :MSTORE(a)
2 :MSTORE(b)
3 :MSTORE(c)
4 :MSTORE(d)
@a => A
@b => A
@c => A
@d => A
end:
0 => A,B,C,D,E,CTX, SP, PC, GAS, MAXMEM, SR
finalWait:
${beforeLast()} : JMPN(finalWait)
: JMP(start)
opINVALID:
执行node src/zkasm.js test/arrays.zkasm -o arrays.json
进行编译。
其中,const lines = zkasm_parser.parse(src);
解析后的结果为:
[
{ # VAR GLOBAL a[100]
"type": "var",
"scope": "GLOBAL",
"name": "a",
"count": 100
},
{ # VAR GLOBAL b
"type": "var",
"scope": "GLOBAL",
"name": "b",
"count": 1
},
{ # VAR GLOBAL c[300]
"type": "var",
"scope": "GLOBAL",
"name": "c",
"count": 300
},
{ # VAR GLOBAL d
"type": "var",
"scope": "GLOBAL",
"name": "d",
"count": 1
},
{ # start:
"type": "label",
"identifier": "start",
"line": 6
},
{ # STEP => A,将STEP寄存器的值直接赋值给A寄存器。
"type": "step",
"assignment": {
"in": {
"type": "REG",
"reg": "STEP"
},
"out": [
"A"
]
},
"ops": [],
"line": 7
},
{ # 0 :ASSERT,assert常量0。
"type": "step",
"assignment": {
"in": {
"type": "CONST",
"const": 0
},
"out": []
},
"ops": [
{
"assert": 1
}
],
"line": 8
},
{ # 1 :MSTORE(a),将常量1存入a数组中的第一位置。
"type": "step",
"assignment": {
"in": {
"type": "CONST",
"const": 1
},
"out": []
},
"ops": [
{
"offset": "a",
"mOp": 1,
"mWR": 1
}
],
"line": 10
},
{ # 2 :MSTORE(b),将常量2存入b中。
"type": "step",
"assignment": {
"in": {
"type": "CONST",
"const": 2
},
"out": []
},
"ops": [
{
"offset": "b",
"mOp": 1,
"mWR": 1
}
],
"line": 11
},
{ # 3 :MSTORE(c),将常量3存入数组c中。
"type": "step",
"assignment": {
"in": {
"type": "CONST",
"const": 3
},
"out": []
},
"ops": [
{
"offset": "c",
"mOp": 1,
"mWR": 1
}
],
"line": 12
},
{ # 4 :MSTORE(d),将常量4存入d中。
"type": "step",
"assignment": {
"in": {
"type": "CONST",
"const": 4
},
"out": []
},
"ops": [
{
"offset": "d",
"mOp": 1,
"mWR": 1
}
],
"line": 13
},
{ # @a => A,将a的索引赋值给A。
"type": "step",
"assignment": {
"in": {
"type": "reference",
"identifier": "a"
},
"out": [
"A"
]
},
"ops": [],
"line": 14
},
{ # @b => A,将b的索引赋值给A。
"type": "step",
"assignment": {
"in": {
"type": "reference",
"identifier": "b"
},
"out": [
"A"
]
},
"ops": [],
"line": 15
},
{ # @c => A,将c的索引赋值给A。
"type": "step",
"assignment": {
"in": {
"type": "reference",
"identifier": "c"
},
"out": [
"A"
]
},
"ops": [],
"line": 16
},
{ # @d => A,将d的索引赋值给A。
"type": "step",
"assignment": {
"in": {
"type": "reference",
"identifier": "d"
},
"out": [
"A"
]
},
"ops": [],
"line": 17
},
{ # end:
"type": "label",
"identifier": "end",
"line": 19
},
{ # 0 => A,B,C,D,E,CTX, SP, PC, GAS, MAXMEM, SR,将这些寄存器清零。
"type": "step",
"assignment": {
"in": {
"type": "CONST",
"const": 0
},
"out": [
"A",
"B",
"C",
"D",
"E",
"CTX",
"SP",
"PC",
"GAS",
"MAXMEM",
"SR"
]
},
"ops": [],
"line": 20
},
{ # finalWait:
"type": "label",
"identifier": "finalWait",
"line": 22
},
{ # ${beforeLast()} : JMPN(finalWait)
"type": "step",
"assignment": {
"in": {
"type": "TAG",
"tag": "beforeLast()" # 为标签。
},
"out": []
},
"ops": [
{
"JMPC": 0,
"JMPN": 1,
"offset": "finalWait"
}
],
"line": 23
},
{ # : JMP(start)
"type": "step",
"assignment": null,
"ops": [
{
"JMP": 1,
"JMPC": 0,
"JMPN": 0,
"offset": "start"
}
],
"line": 25
},
{ # opINVALID:
"type": "label",
"identifier": "opINVALID",
"line": 26
}
]
然后对以上内容逐行处理:
for (let i=0; i<lines.length; i++) {
const l = lines[i];
ctx.currentLine = l;
l.fileName = relativeFileName;
if (l.type == "include") {
const fullFileNameI = path.resolve(fileDir, l.file);
await compile(fullFileNameI, ctx);
if (pendingCommands.length>0) error(l, "command not allowed before include");
lastLineAllowsCommand = false;
} else if (l.type == "var") {
if (typeof ctx.vars[l.name] !== "undefined") error(l, `Variable ${l.name} already defined`);
if (l.scope == "GLOBAL") { // 给全局变量根据名称分配,不允许有重名情况。
ctx.vars[l.name] = {
scope: "GLOBAL",
offset: ctx.lastGlobalVarAssigned + 1
}
ctx.lastGlobalVarAssigned += l.count; // 适于按数组分配。
} else if (l.scope == "CTX") {
ctx.vars[l.name] = {
scope: "CTX",
offset: ctx.lastLocalVarCtxAssigned + 1
}
ctx.lastLocalVarCtxAssigned += l.count;
} else {
throw error(l, `Invalid scope ${l.scope}`);
}
if (pendingCommands.length>0) error(l, "command not allowed before var");
lastLineAllowsCommand = false;
} else if (l.type == 'constdef' || l.type == 'constldef' ) {
const value = evaluateExpression(ctx, l.value);
let ctype = l.type == 'constldef' ? 'CONSTL':'CONST';
defineConstant(ctx, l.name, ctype, value);
} else if (l.type == "step") { // start/end等标签下的实际执行语句
const traceStep = { // traceStep内map:step[key]=op[key]
// type: "step"
};
try {
for (let j=0; j< l.ops.length; j++) { //过滤校验下规则,不能同时定义2个assignement。
if (!l.ops[j].assignment) continue;
if (l.assignment) {
error(l, "not allowed assignments with this operation");
}
l.assignment = l.ops[j].assignment;
delete l.ops[j].assignment;
}
/*function appendOp(step, op) {
Object.keys(op).forEach(function(key) {
if (typeof step[key] !== "undefined") throw new Error(`Var ${key} already defined`);
step[key] = op[key];
});
}*/
if (l.assignment) { //处理assignment中的in和out内容。
appendOp(traceStep, processAssignmentIn(ctx, l.assignment.in, ctx.out.length));
appendOp(traceStep, processAssignmentOut(ctx, l.assignment.out));
}
for (let j=0; j< l.ops.length; j++) { //将每个ops元素存入step map中。
appendOp(traceStep, l.ops[j])
}
if (traceStep.JMPC && !traceStep.bin) {
error(l, "JMPC must go together with a binary op");
}
} catch (err) {
error(l, err);
}
// traceStep.lineNum = ctx.out.length;
traceStep.line = l;
ctx.out.push(traceStep); //将traceStep放入ctx.out数组中。
if (pendingCommands.length>0) {
traceStep.cmdBefore = pendingCommands;
pendingCommands = [];
}
lastLineAllowsCommand = !(traceStep.JMP || traceStep.JMPC || traceStep.JMPN);
} else if (l.type == "label") { // start/end等标识符,不允许有重名情况。
const id = l.identifier
if (ctx.definedLabels[id]) error(l, `RedefinedLabel: ${id}` );
ctx.definedLabels[id] = ctx.out.length;
if (pendingCommands.length>0) error(l, "command not allowed before label")
lastLineAllowsCommand = false;
} else if (l.type == "command") {
if (lastLineAllowsCommand) {
if (typeof ctx.out[ctx.out.length-1].cmdAfter === "undefined")
ctx.out[ctx.out.length-1].cmdAfter = [];
ctx.out[ctx.out.length-1].cmdAfter.push(l.cmd);
} else {
pendingCommands.push(l.cmd);
}
} else {
error(l, `Invalid line type: ${l.type}`);
}
}
assignment中的in内容的处理规则为:
function processAssignmentIn(ctx, input, currentLine) {
const res = {};
let E1, E2;
if (input.type == "TAG") { # ${beforeLast()} : JMPN(finalWait),会调用command_parser。
res.freeInTag = input.tag ? command_parser.parse(input.tag) : { op: ""};
res.inFREE = 1n;
return res;
}
if (input.type == "REG") {
if (input.reg == "zkPC") {
res.CONST = BigInt(currentLine);
}
else {
res["in"+ input.reg] = 1n;
}
return res;
}
if (input.type == "COUNTER") {
let res = {};
res["in" + input.counter.charAt(0).toUpperCase() + input.counter.slice(1)] = 1n;
return res;
}
if (input.type == "CONST") {
res.CONST = BigInt(input.const);
return res;
}
if (input.type == "CONSTL") {
res.CONSTL = BigInt(input.const);
return res;
}
if (input.type == 'CONSTID') {
const [value, ctype] = getConstant(ctx, input.identifier);
res[ctype] = value;
return res;
}
if (input.type == "exp") {
res.CONST = BigInt(input.values[0])**BigInt(input.values[1]);
return res;
}
if ((input.type == "add") || (input.type == "sub") || (input.type == "neg") || (input.type == "mul")) {
E1 = processAssignmentIn(ctx, input.values[0], currentLine);
}
if ((input.type == "add") || (input.type == "sub") || (input.type == "mul")) {
E2 = processAssignmentIn(ctx, input.values[1], currentLine);
}
if (input.type == "mul") {
if (isConstant(E1)) {
if (typeof E2.CONSTL !== 'undefined') {
throw new Error("Not allowed CONST and CONSTL in same operation");
}
Object.keys(E2).forEach(function(key) {
E2[key] *= E1.CONST;
});
return E2;
} else if (isConstant(E2)) {
if (typeof E1.CONSTL !== 'undefined') {
throw new Error("Not allowed CONST and CONSTL in same operation");
}
Object.keys(E1).forEach(function(key) {
E1[key] *= E2.CONST;
});
return E1;
} else {
throw new Error("Multiplication not allowed in input");
}
}
if (input.type == "neg") {
Object.keys(E1).forEach(function(key) {
E1[key] = -E1[key];
});
return E1;
}
if (input.type == "sub") {
Object.keys(E2).forEach(function(key) {
if (key != "freeInTag") {
E2[key] = -E2[key];
}
});
input.type = "add";
}
if (input.type == "add") {
if (E1.freeInTag && E2.freeInTag) throw new Error("Only one tag allowed");
Object.keys(E2).forEach(function(key) {
if (E1[key]) {
E1[key] += E2[key];
} else {
E1[key] = E2[key];
}
});
if (typeof E1.CONST !== 'undefined' && typeof E1.CONSTL !== 'undefined') {
throw new Error("Not allowed CONST and CONSTL in same operation");
}
return E1;
}
if (input.type == 'reference') {
res.labelCONST = input.identifier;
if (typeof ctx.definedLabels[input.identifier] !== 'undefined') {
res.CONST = BigInt(ctx.definedLabels[input.identifier]);
}
else if (typeof ctx.vars[input.identifier] !== 'undefined') {
res.CONST = BigInt(ctx.vars[input.identifier].offset);
}
else {
throw new Error(`Not found label/variable ${input.identifier}`)
}
return res;
}
throw new Error( `Invalid type: ${input.type}`);
function isConstant(o) {
let res = true;
Object.keys(o).forEach(function(key) {
if (key != "CONST") res = false;
});
return res;
}
}
assignment中out内容的处理规则为:
function processAssignmentOut(ctx, outputs) {
const res = {};
for (let i=0; i<outputs.length; i++) {
if (typeof res["set"+ outputs[i]] !== "undefined") throw new Error(`Register ${outputs[i]} added twice in asssignment output`);
if (readOnlyRegisters.includes(outputs[i])) { // 预留的只读寄存器不可写,不能在out中。
const l = ctx.currentLine;
throw new Error(`Register ${outputs[i]} is readonly register, could not be used as output destination. ${l.fileName}:${l.line}`);
}
res["set"+ outputs[i]] = 1;
}
return res;
}
最后再进一步将ctx.out中的内容铺平展开:
if (isMain) {
for (let i=0; i<ctx.out.length; i++) {
if (
(typeof ctx.out[i].offset !== "undefined") &&
(isNaN(ctx.out[i].offset))
) {
if (ctx.out[i].JMP || ctx.out[i].JMPC || ctx.out[i].JMPN) {
if (typeof ctx.definedLabels[ctx.out[i].offset] === "undefined") {
error(ctx.out[i].line, `Label: ${ctx.out[i].offset} not defined.`);
}
ctx.out[i].offsetLabel = ctx.out[i].offset;
ctx.out[i].offset = ctx.definedLabels[ctx.out[i].offset];
} else {
ctx.out[i].offsetLabel = ctx.out[i].offset;
if (typeof ctx.vars[ctx.out[i].offset] === "undefined") {
error(ctx.out[i].line, `Variable: ${ctx.out[i].offset} not defined.`);
}
if (ctx.vars[ctx.out[i].offset].scope === 'CTX') {
ctx.out[i].useCTX = 1;
} else if (ctx.vars[ctx.out[i].offset].scope === 'GLOBAL') {
ctx.out[i].useCTX = 0;
} else {
error(ctx.out[i].line, `Invalid variable scpoe: ${ctx.out[i].offset} not defined.`);
}
ctx.out[i].offset = ctx.vars[ctx.out[i].offset].offset;
}
}
try {
parseCommands(ctx.out[i].cmdBefore);
parseCommands(ctx.out[i].cmdAfter);
} catch (err) {
err.message = "Error parsing tag: " + err.message;
error(ctx.out[i].line, err);
}
resolveDataOffset(i, ctx.out[i]);
ctx.out[i].fileName = ctx.out[i].line.fileName;
ctx.out[i].line = ctx.out[i].line.line;
ctx.out[i].lineStr = ctx.srcLines[ctx.out[i].fileName][ctx.out[i].line - 1] ?? '';
}
const res = {
program: stringifyBigInts(ctx.out),
labels: ctx.definedLabels
}
return res;
}
最终arrays.zkasm的编译结果为:
{
"program": [
{
"inSTEP": "1",
"setA": 1,
"line": 7,
"fileName": "arrays.zkasm",
"lineStr": " STEP => A"
},
{
"CONST": "0",
"assert": 1,
"line": 8,
"fileName": "arrays.zkasm",
"lineStr": " 0 :ASSERT"
},
{
"CONST": "1",
"offset": 0,
"mOp": 1,
"mWR": 1,
"line": 10,
"offsetLabel": "a",
"useCTX": 0,
"fileName": "arrays.zkasm",
"lineStr": " 1 :MSTORE(a)"
},
{
"CONST": "2",
"offset": 100,
"mOp": 1,
"mWR": 1,
"line": 11,
"offsetLabel": "b",
"useCTX": 0,
"fileName": "arrays.zkasm",
"lineStr": " 2 :MSTORE(b)"
},
{
"CONST": "3",
"offset": 101,
"mOp": 1,
"mWR": 1,
"line": 12,
"offsetLabel": "c",
"useCTX": 0,
"fileName": "arrays.zkasm",
"lineStr": " 3 :MSTORE(c)"
},
{
"CONST": "4",
"offset": 401,
"mOp": 1,
"mWR": 1,
"line": 13,
"offsetLabel": "d",
"useCTX": 0,
"fileName": "arrays.zkasm",
"lineStr": " 4 :MSTORE(d)"
},
{
"labelCONST": "a",
"CONST": "0",
"setA": 1,
"line": 14,
"fileName": "arrays.zkasm",
"lineStr": " @a => A"
},
{
"labelCONST": "b",
"CONST": "100",
"setA": 1,
"line": 15,
"fileName": "arrays.zkasm",
"lineStr": " @b => A"
},
{
"labelCONST": "c",
"CONST": "101",
"setA": 1,
"line": 16,
"fileName": "arrays.zkasm",
"lineStr": " @c => A"
},
{
"labelCONST": "d",
"CONST": "401",
"setA": 1,
"line": 17,
"fileName": "arrays.zkasm",
"lineStr": " @d => A"
},
{
"CONST": "0",
"setA": 1,
"setB": 1,
"setC": 1,
"setD": 1,
"setE": 1,
"setCTX": 1,
"setSP": 1,
"setPC": 1,
"setGAS": 1,
"setMAXMEM": 1,
"setSR": 1,
"line": 20,
"fileName": "arrays.zkasm",
"lineStr": " 0 => A,B,C,D,E,CTX, SP, PC, GAS, MAXMEM, SR"
},
{
"freeInTag": {
"op": "functionCall",
"funcName": "beforeLast",
"params": []
},
"inFREE": "1",
"JMPC": 0,
"JMPN": 1,
"offset": 11,
"line": 23,
"offsetLabel": "finalWait",
"fileName": "arrays.zkasm",
"lineStr": " ${beforeLast()} : JMPN(finalWait)"
},
{
"JMP": 1,
"JMPC": 0,
"JMPN": 0,
"offset": 0,
"line": 25,
"offsetLabel": "start",
"fileName": "arrays.zkasm",
"lineStr": " : JMP(start)"
}
],
"labels": {
"start": 0,
"end": 10,
"finalWait": 11,
"opINVALID": 13
}
}
参考资料
[1] zkASM基础语法