The Assembler Editor program is difficult to write.
My abiity of C language is weak. I have no good idea to write.
So i download the code posted by others,and write the code follow the example.
Here are some analysis of the code.
The major document is hack.c ,and it just contain a main function.
int main(int argc,char* argv[])
{
char str[50];
char ch;
if(argc != 2)
{
printf("Usage: ASSEMBLE source [listing]\n");
exit(1);
}
strcpy(str,argv[1]);
if(strstr(argv[1],".asm"))
strcpy(str + strlen(str) - 4,".hack");
sourcefd = fopen(argv[1],"r");
freopen(str,"w",stdout);
parser(argv[1],str);
fclose(sourcefd);
return 0;
}
First,if the number of input parameters is not match.Just prompt a error.Otherwise,open xxx.asm file and create a xxx.hack file.
And then call the function parser.
Here is the code of function parser()
void parser(char* infile,char* outfile)
{
int i;
struct sybtable *p;
symbol_init();
/*printf("Vartype Varname Varaddr\n");
for(i = 0;i < maxsym;i ++)
{
if(sytab[i].next)
{
p = sytab[i].next;
while(p)
{
printf("%7d %7s %7d\n",p->node.var,p->node.str,p->node.addr);
p = p -> next;
}
}
}*/
/*printf("first_pass started!\n");*/
first_pass(infile);
/*printf("first_pass completed!\n");*/
two_pass(infile,outfile);
}
At the begining,call the function symbol_init()
void symbol_init()
{
int i;
for(i = 0;i < maxsym;i ++)
sytab[i].next = NULL;
for(i = 0;i < 23;i ++)
insert(prevar[i]);
}
This function initialize the symbol table as the follow by function insert().
struct var_addr prevar[] = {{"SP",0},{"LCL",1},{"ARG",2},{"THIS",3},
{"THAT",4},{"R0",0},{"R1",1},{"R2",2},
{"R3",3},{"R4",4},{"R5",5},{"R6",6},
{"R7",7},{"R8",8},{"R9",9},{"R10",10},
{"R11",11},{"R12",12},{"R13",13},{"R14",14},
{"R15",15},{"SCREEN",16384},{"KBD",24576}};
The struct storaged the symbol and address calls sybtable,it's a list.
Array sytab is a hash table . (ps:before i learn this code , i really don't konw what is hash table.Afterwards , i understand what is it , just a array of list . )
struct var_addr
{
char str[50];
int addr;
};
struct sybtable
{
struct var_addr node;
struct sybtable *next;
};
struct sybtable sytab[maxsym];
The code of function insert() , search() , hash():
void insert(struct var_addr var)
{
struct sybtable *temp,*pos;
if((temp = search(var.str,&pos)))
{
temp -> next = malloc(sizeof(struct sybtable));
temp = temp -> next;
temp -> node = var;
temp -> next = NULL;
}
}
struct sybtable * search(char *str,struct sybtable **temp)
{
int i,sum;
struct sybtable *p;
sum = hash(str);
p = &sytab[sum];
while((p->next))
{
p = p -> next;
if(!strcmp(p->node.str,str))
{
*temp = p;
return 0;
}
}
return *temp = p;
}
int hash(char *str)
{
int i,sum = 0,len = strlen(str);
for(i = 0;i < len;i ++)
sum += str[i];
sum = sum % maxsym;
return sum;
}
First , the hash() search which array may the symbol exist.
And then , the search() find if the symbol already exist.
At last insert() insert the symbol if search() return the node.
Let us back to parser() and carry on analyse it.
After initialize the symbol table.It scan the code two times.
The first time , It will remove the explanation and record the addressof the symbol we make just lisk (LOOP) or (END).
The code of first_pass():
void first_pass(char* infile)
{
char ch,str[50];
struct var_addr tem;
int sw;
code_addr = 0;
lex_init();
/*tem = malloc(sizeof(struct var_addr));*/
while((ch = nextch()) != EOF)
{
/*printf("%c\n",ch);*/
if(ch == '(')
{
code_addr --;
ch = nextch();
getvar(ch,str);
/*printf("%s\n",str);*/
strcpy(tem.str,str);
tem.addr = code_addr;
insert(tem);
}
}
/*printf("%d\n",sw);*/
/*free(tem);*/
}
And the sub functions of it:
</pre><pre name="code" class="csharp">char nextch()
{
char ch;
if(buffer[charpos] == '\n')
{
code_addr ++;
do
{
charpos = 0;
buflen = 0;
/*printf("%c\n",ch);*/
ch = fgetc(sourcefd);
/*printf("%c\n",ch);*/
while((buflen < (int)maxlexem) && (ch != EOF))
{
/*去掉注释*/
if(ch == '/')
{
ch = fgetc(sourcefd);
if(ch == '/')
{
while(ch != '\n')
ch = fgetc(sourcefd);
}
buffer[buflen ++] = '\n';
/*printf("%c\n",ch);*/
break;
}
if(ch == ' ')
{
ch = fgetc(sourcefd);
continue;
}
buffer[buflen ++] = ch;
if(ch == '\n')
break;
ch = fgetc(sourcefd);
}
}while((buflen == 1) && (ch == '\n') || (buflen == 0 && ch != EOF)); /*删除空行 */
}
if(ch == EOF)
buffer[buflen ++] = EOF;
return buffer[charpos ++];
}
int curchar()
{
return buffer[charpos];
}
void upchar()
{
charpos --;
}
void lex_init()
{
charpos = 0;
buflen = 0;
buffer[0] = '\n';
}
lex_init() just create a new buffer to store the command.
nextch() can remove the explanation ,put the command to the buffer and return the first char of the array .
curchar() can return the current character.
upchar() can change the index of buffer array , so it make the character fall back .
void getvar(char ch,char *str)
{
int len = 0;
char vch;
upchar();
vch = nextch();
while(isalnum(vch) || vch == '_' || vch == '.' || vch == ':' || vch == '$')
{
str[len ++] = vch;
if(curchar() == '\n')
break;
vch = nextch();
}
if(curchar() != '\n')
upchar();
str[len] = '\0';
}
getvar() can return the string until pointer ch meets illegal such as '=' . '\n' or ';'
So,initialize the buffer . if the first character is '(' , and insert it to symbol if it can.
After that , just scan it again.
two_pass()
void two_pass(char* infile,char* outfile)
{
char ch;
int value,des;
struct var_addr tem;
date_addr = 16;
lex_init();
rewind(sourcefd);
/*freopen("add.hack","w",stdout);*/
while((ch = nextch()) != EOF)
{
/*printf("%d\n",sw);
printf("%d\n",Acmd);*/
/*printf("%c\n",value);*/
switch(ch)
{
case '(':
Lcommad();
break;
case '@':
Acommad(ch);
break;
default:
/*printf("%c\n",value);*/
Ccommad(ch);
break;
}
}
}
If the first character is '(' do the Lcommad() , if is '@' do the Acommad() , else do the Ccommad().
Lcommad()
void Lcommad()
{
char ch;
while((ch = nextch()) != ')');
}
just pass this line
void Acommad(int v)
{
int i,mod,value;
char ch,str[50];
struct var_addr tem;
struct sybtable *p,*newnode;
int sw;
for(i = 0;i < 17;i ++)
instr[i] = '0';
instr[0] = '0';
instr[16] = '\0';
ch = nextch();
if(isalpha(ch))
{
getvar(ch,tem.str);
if(!search(tem.str,&p))
value = p->node.addr;
else
{
tem.addr = value = date_addr;
date_addr ++;
p -> next = malloc(sizeof(struct sybtable));
p = p -> next;
p -> next = NULL;
p -> node = tem;
}
}
else
value = getnum(ch);
for(i = 15;i > 0;i --)
{
mod = value % 2;
value = value / 2;
instr[i] = mod + '0';
}
printf("%s\n",instr);
}
(ps: if can't find it, it will create a new symbol and allocate memory.)
Or if just a number as @123 , just transform it to binary.
Ccommad()
void Ccommad(int value)
{
int i;
char ch,str[50];
for(i = 0;i < 17;i ++)
instr[i] = '0';
instr[0] = '1';
instr[1] = '1';
instr[2] = '1';
instr[16] = '\0';
getvar(ch,str);
ch = nextch();
if(ch == '=')
{
destyield(str);
value = nextch();
}
else
{
value = str[0];
upchar();
}
compyield(value);
ch = nextch();
if(ch == ';')
{
ch = nextch();
getvar(ch,str);
jumpyield(str);
}
else
upchar();
printf("%s\n",instr);
}
Sub functions
void compyield(int ch)
{
char prech;
switch(ch)
{
case '0':
instr[4] = '1';instr[6] = '1';instr[8] = '1';
break;
case '1':
instr[4] = '1';instr[5] = '1';instr[6] = '1';
instr[7] = '1';instr[8] = '1';instr[9] = '1';
break;
case '-':
ch = nextch();
switch(ch)
{
case '1':
instr[4] = '1';instr[5] = '1';instr[6] = '1';
instr[8] = '1';
break;
case 'D':
instr[6] = '1';instr[7] = '1';instr[8] = '1';
instr[9] = '1';
break;
case 'A':
case 'M':
instr[4] = '1';instr[8] = '1';instr[8] = '1';
instr[9] = '1';
if(ch == 'M')
instr[3] = '1';
break;
}
break;
case '!':
ch = nextch();
switch(ch)
{
case 'D':
instr[6] = '1';instr[7] = '1';instr[9] = '1';
break;
case 'A':
case 'M':
instr[4] = '1';instr[5] = '1';instr[9] = '1';
if(ch == 'M')
instr[3] = '1';
break;
}
break;
case 'D':
ch = nextch();
switch(ch)
{
case '+':
ch = nextch();
switch(ch)
{
case '1':
instr[5] = '1';instr[6] = '1';instr[7] = '1';
instr[8] = '1';instr[9] = '1';
break;
case 'A':
case 'M':
instr[8] = '1';
if(ch == 'M')
instr[3] = '1';
break;
}
break;
case '-':
ch = nextch();
switch(ch)
{
case '1':
instr[6] = '1';instr[7] = '1';instr[8] = '1';
break;
case 'A':
case 'M':
instr[5] = '1';instr[8] = '1';instr[9] = '1';
if(ch == 'M')
instr[3] = '1';
break;
}
break;
case '&':
ch = nextch();
switch(ch)
{
case 'A':
case 'M':
if(ch == 'M')
instr[3] = '1';
break;
}
break;
case '|':
ch = nextch();
switch(ch)
{
case 'A':
case 'M':
instr[5] = '1';instr[7] = '1';instr[9] = '1';
if(ch == 'M')
instr[3] = '1';
break;
}
break;
default:
instr[6] = '1';instr[7] = '1';
upchar();
break;
}
break;
case 'A':
case 'M':
prech = ch;
ch = nextch();
switch(ch)
{
case '+':
ch = nextch();
instr[4] = '1';instr[5] = '1';instr[7] = '1';
instr[8] = '1';instr[9] = '1';
break;
case '-':
ch = nextch();
switch(ch)
{
case '1':
instr[4] = '1';instr[5] = '1';instr[8] = '1';
break;
case 'D':
instr[7] = '1';instr[8] = '1';instr[9] = '1';
break;
}
break;
default:
instr[4] = '1';instr[5] = '1';
upchar();
break;
}
if(prech == 'M')
instr[3] = '1';
break;
}
}
void destyield(char *str)
{
int i,len = strlen(str);
for(i = 0;i < len;i ++)
{
switch(str[i])
{
case 'M':
instr[12] = '1';
break;
case 'A':
instr[10] = '1';
break;
case 'D':
instr[11] = '1';
break;
}
}
}
void jumpyield(char *str)
{
if(!strcmp(str,"JGT"))
{
instr[15] = '1';
}
else if(!strcmp(str,"JEQ"))
{
instr[14] = '1';
}
else if(!strcmp(str,"JGE"))
{
instr[15] = '1';instr[14] = '1';
}
else if(!strcmp(str,"JLT"))
{
instr[13] = '1';
}
else if(!strcmp(str,"JNE"))
{
instr[13] = '1';instr[15] = '1';
}
else if(!strcmp(str,"JLE"))
{
instr[13] = '1';instr[14] = '1';
}
else if(!strcmp(str,"JMP"))
{
instr[13] = '1';instr[14] = '1';instr[15] = '1';
}
}
The sub function is easy to understand , just change corresponding bits accordding to the command.
And Ccommad() calls corresponding sub function when it meet '=' or ';'
Actually . getvar() cut the Ccommand if meet '=' or ';' , so it's divided into three parts and it's easy to do the judgement.
Here is the complete code.(ps: i am not the author,i just download it and study it.)
http://pan.baidu.com/s/1mglzIL6