最近在学习《计算机系统要素——从零构建现代计算机》这本书,花了两天时间用antlr4把书中第6章的汇编编译器实现了.
输出的机器指令和参考答案完全一致.
下面把antlr4的代码贴出来,供后来者参考
/**
* Define a grammar called Hello
*/
grammar HackAsm;
@parser::header{
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.LinkedList;
}
@parser::members{
class Command{
String code; //指令的编码
final static boolean DEBUG=false; //是否为调试模式
}
class ACommand extends Command
{
Integer addr;
String ID;
void update()
{
String str = Integer.toBinaryString(addr);
if (Command.DEBUG) {
System.out.println("addr="+addr);
System.out.println("str="+str);
}
int offset = 16-str.length();
if(offset>0)
{
byte[] b= new byte[offset];
for(int i=0;i<offset;++i)
b[i]='0';
str = new String(b)+str; //前面补0
if (Command.DEBUG)
System.out.println("str="+str);
}
code = str.substring(0,16);
}
}
}
prog
locals[
List<Command> cmds;
Map<String,Integer> sym_table
]
@init{
$ctx.cmds= new LinkedList<Command>();
$ctx.sym_table = new HashMap<String,Integer>();
Map<String,Integer> sym_table =$ctx.sym_table;
//建立预定义符号表
sym_table.put("SP",0);
sym_table.put("LCL",1);
sym_table.put("ARG",2);
sym_table.put("THIS",3);
sym_table.put("THAT",4);
sym_table.put("SCREEN",16384);
sym_table.put("KBD",24576);
for(int i=0;i<16;++i)
sym_table.put("R"+i,i);
}
@after{
if (Command.DEBUG) {
System.out.println("after prog");
System.out.println("processed "+$ctx.cmds.size()+" commands");
System.out.println($ctx.sym_table.size()+" syms");
}
//遍历每个A command,把里面的ID变成数字
Map<String,Integer> vars = new HashMap<String,Integer>();
Map<String,Integer> syms =$ctx.sym_table;
List<Command> cmds = $ctx.cmds;
int var_addr = 16;
for(Command c:cmds)
{
if (c instanceof ACommand){
ACommand cmd = (ACommand) c;
String id = cmd.ID;
if (id != null)
{
cmd.addr = syms.get(id);
if (cmd.addr == null){
cmd.addr = vars.get(id);
if (cmd.addr == null)
{
cmd.addr = var_addr;
vars.put(id,var_addr);
++var_addr;
}
}
}
cmd.update();
}
}
for(Command c:cmds)
{
System.out.println(c.code);
}
}
: (command? EL)+
{
}
;
command
@after{
ProgContext prog = (ProgContext)($ctx.getParent());
}
:
a_command
{
ProgContext prog = (ProgContext)($ctx.getParent());
List<Command> cmds = prog.cmds;
cmds.add($a_command.cmd);
}
|c_command
{
ProgContext prog = (ProgContext)($ctx.getParent());
List<Command> cmds = prog.cmds;
cmds.add($c_command.cmd);
}
|label
{
ProgContext prog = (ProgContext)($ctx.getParent());
Map<String,Integer> sym_table = prog.sym_table;
String str =$label.label_name;
if (sym_table.containsKey(str))
throw new RuntimeException("label "+ str +"has already used!");
else
sym_table.put(str,prog.cmds.size());
}
;
label returns[String label_name]
:
LP ID RP
{
String str =$ID.text;
$label_name= str;
if (Command.DEBUG)
System.out.println("label="+$label_name);
}
;
a_command returns[ACommand cmd]
@init{
$cmd = new ACommand();
}
@after{
if (Command.DEBUG)
System.out.println("ACommand: addr= "
+$cmd.addr+", ID="+$cmd.ID
);
}
:
AT NUM
{
$cmd.addr = $NUM.int;
$cmd.ID = null;
}
|AT ID
{
$cmd.ID = $ID.text;
//System.out.println("ACommand: ID= " +", ID="+$ID.text);
}
//下面两个分支是因为0 1也属于NUM,不写的话运行时会出问题!
|AT ZERO
{
$cmd.addr = 0;
$cmd.ID = null;
}
|AT ONE
{
$cmd.addr = 1;
$cmd.ID = null;
}
;
c_command returns[Command cmd]
locals[
StringBuilder destcode;
String compcode;
String jmpcode
]
@init{
$cmd = new Command();
$ctx.destcode = new StringBuilder("000");
$ctx.jmpcode ="000";
$ctx.compcode ="0000000";
}
@after{
$cmd.code = "111"+$ctx.compcode+$ctx.destcode+$ctx.jmpcode;
if (Command.DEBUG){
System.out.println($ctx.getText()+" is a c_command.");
System.out.println("dest:"+$ctx.destcode);
System.out.println("comp:"+$ctx.compcode);
System.out.println("jmp:"+$ctx.jmpcode);
System.out.println("code:"+$cmd.code);
}
}
:
(dests=(ID|DEST) ASSIGN)? comp (SEMICOLON jmp)?
{
String dest = $dests.text;
if (Command.DEBUG)
System.out.println("comp.dests.text"+dest);
if (dest!=null)
{
if(dest.indexOf("A")!=-1)
$ctx.destcode.setCharAt(0,'1');
if(dest.indexOf("D")!=-1)
$ctx.destcode.setCharAt(1,'1');
if(dest.indexOf("M")!=-1)
$ctx.destcode.setCharAt(2,'1');
}
if (Command.DEBUG)
System.out.println("comp_part:"+$comp.text);
$ctx.compcode = $comp.afield + $comp.code;
String JMPstr=$jmp.text;
if (JMPstr != null)
$ctx.jmpcode = $jmp.code;
}
//|comp (SEMICOLON jmp)?
;
comp returns[String code,String afield]
@init{
$code ="000000";
$afield="0";
}
@after{
if (Command.DEBUG)
System.out.println("comp_code:"+$code);
}
:
bin_expr
{
String str = $bin_expr.text;
if (str.indexOf("M")!= -1)
$afield="1";
if (str.equals("D+1"))
$code="011111";
else if(str.equals("D-1"))
$code="001110";
else if (str.equals("A+1") ||str.equals("M+1"))
$code="110111";
else if (str.equals("A-1") ||str.equals("M-1"))
$code="110010";
else if (str.equals("D+A") ||str.equals("D+M"))
$code="000010";
else if (str.equals("D-A") ||str.equals("D-M"))
$code="010011";
else if (str.equals("A-D") ||str.equals("M-D"))
$code="000111";
else if (str.equals("D&A") ||str.equals("D&M"))
$code="000000";
else if (str.equals("D|A") ||str.equals("D|M"))
$code="010101";
}
|un_expr
{
String str = $un_expr.text;
if (str.indexOf("M")!= -1)
$afield="1";
if (str.equals("-1"))
$code="111010";
else if (str.equals("-D"))
$code="001111";
else if (str.equals("-A")||str.equals("-M"))
$code="110011";
else if (str.equals("!D"))
$code="001101";
else if (str.equals("!A") ||str.equals("!M"))
$code="110001";
}
|ZERO
{$code="101010";}
|ONE
{$code="111111";}
|DEST
{
String str =$DEST.text;
if (Command.DEBUG)
System.out.println("comp.dest.text:"+str);
if (str.equals("D"))
$code="001100";
else if (str.equals("A")||str.equals("M"))
$code="110000";
if (str.equals("M"))
$afield="1";
}
;
bin_expr:
DEST bin_op (ONE|DEST)
;
bin_op:PLUS|MINUS|BIT_AND|BIT_OR
;
un_expr:
neg_expr|not_expr
;
neg_expr:MINUS (ONE|DEST);
not_expr:BIT_NOT DEST;
jmp returns[String code]
:
'JGT'{$code ="001";}
|'JEQ'{$code ="010";}
|'JGE'{$code ="011";}
|'JLT'{$code ="100";}
|'JNE'{$code ="101";}
|'JLE'{$code ="110";}
|'JMP'{$code ="111";}
;
BLOCK_COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;
SL_COMMENT
: '//' ~[\r\n]* -> channel(HIDDEN) //过滤行注释,本技巧见课本中文版的第181页的12.1节,但是要保留最后的回车符!
;
DEST:'A'|'M'|'D';
ZERO:'0';
ONE:'1';
ID : [A-Za-z$:.][A-Za-z0-9_$:.]* ;
NUM:[0-9][0-9]*
;
PLUS: '+';
MINUS: '-';
BIT_AND:'&';
BIT_OR:'|';
BIT_NOT:'!';
ASSIGN: '=';
SEMICOLON : ';' ;
LP: '('
;
RP:')'
;
AT: '@';
EL : '\n';
WS : [ \t\r]+ -> skip ; // skip spaces, tabs, newlines
运行办法:把上面的代码保存到HackAsm.g4文件中,然后和
antlr-4.7.1-complete.jar放到一个目录下,设置好java的环境变量之后,
进入到文件所在的命令行,执行下面的命令
java -cp .\antlr-4.7.1-complete.jar;%CLASSPATH% org.antlr.v4.Tool HackAsm.g4
javac -cp .\antlr-4.7.1-complete.jar;%CLASSPATH% *.java
如果没有错误的话,把书中第6章的所有测试代码文件也复制到本目录下,
然后执行
@java -cp .\antlr-4.7.1-complete.jar;%CLASSPATH% org.antlr.v4.gui.TestRig HackAsm prog Add.asm
输出结果为
0000000000000010
1110110000010000
0000000000000011
1110000010010000
0000000000000000
1110001100001000
经过比对和书中自带的汇编编译器产生的结果完全一致,说明测试通过