CSAPP lab. assumulator and assembler

两个关键的c文件实现了一个Y86模拟器和一个Y86汇编器

Y86模拟器源代码:

/* Instruction set simulator for Y86 Architecture */

#include <stdio.h>
#include <stdlib.h>

#include "y86sim.h"

#define err_print(_s, _a ...) \
    fprintf(stdout, _s"\n", _a);

long_t get_reg_val(mem_t*, regid_t);
void set_reg_val(mem_t*, regid_t,long_t);

typedef enum {STAT_AOK, STAT_HLT, STAT_ADR, STAT_INS} stat_t;

char *stat_names[] = { "AOK", "HLT", "ADR", "INS" };

char *stat_name(stat_t e)
{
    if (e < STAT_AOK || e > STAT_INS)
        return "Invalid Status";
    return stat_names[e];
}

char *cc_names[8] = {
    "Z=0 S=0 O=0",
    "Z=0 S=0 O=1",
    "Z=0 S=1 O=0",
    "Z=0 S=1 O=1",
    "Z=1 S=0 O=0",
    "Z=1 S=0 O=1",
    "Z=1 S=1 O=0",
    "Z=1 S=1 O=1" };

char *cc_name(cc_t c)
{
    int ci = c;
    if (ci < 0 || ci > 7)
        return "???????????";
    else
        return cc_names[c];
}

bool_t get_byte_val(mem_t *m, long_t addr, byte_t *dest)
{
    if (addr < 0 || addr >= m->len)
        return FALSE;
    *dest = m->data[addr];
    return TRUE;
}

bool_t get_long_val(mem_t *m, long_t addr, long_t *dest)
{
    int i;
    long_t val;
    if (addr < 0 || addr + 4 > m->len)
	    return FALSE;
    val = 0;
    for (i = 0; i < 4; i++)
	    val = val | m->data[addr+i]<<(8*i);
    *dest = val;
    return TRUE;
}

bool_t set_byte_val(mem_t *m, long_t addr, byte_t val)
{
    if (addr < 0 || addr >= m->len)
	    return FALSE;
    m->data[addr] = val;
    return TRUE;
}

bool_t set_long_val(mem_t *m, long_t addr, long_t val)
{
    int i;
    if (addr < 0 || addr + 4 > m->len)
	    return FALSE;
    for (i = 0; i < 4; i++) {
    	m->data[addr+i] = val & 0xFF;
    	val >>= 8;
    }
    return TRUE;
}

bool_t push_long_val(y86sim_t *sim, long_t val)
{
	long_t regESP_cont = get_reg_val(sim->r,REG_ESP) - 0x4;
	set_reg_val(sim->r,REG_ESP ,regESP_cont);
	if(!set_long_val(sim->m,regESP_cont,val))
		return FALSE;
	return TRUE;
}

bool_t pop_long_val(y86sim_t *sim,long_t *dest)
{
	long_t regESP_cont = get_reg_val(sim->r,REG_ESP);
	if(!get_long_val(sim->m,regESP_cont,dest))
		return FALSE;
	set_reg_val(sim->r,REG_ESP,regESP_cont+0x4);
	return TRUE;
}

mem_t *init_mem(int len)
{
    mem_t *m = (mem_t *)malloc(sizeof(mem_t));
    len = ((len+BLK_SIZE-1)/BLK_SIZE)*BLK_SIZE;
    m->len = len;
    m->data = (byte_t *)calloc(len, 1);

    return m;
}

void free_mem(mem_t *m)
{
    free((void *) m->data);
    free((void *) m);
}

mem_t *dup_mem(mem_t *oldm)
{
    mem_t *newm = init_mem(oldm->len);
    memcpy(newm->data, oldm->data, oldm->len);
    return newm;
}

bool_t diff_mem(mem_t *oldm, mem_t *newm, FILE *outfile)
{
    long_t pos;
    int len = oldm->len;
    bool_t diff = FALSE;
    
    if (newm->len < len)
	    len = newm->len;
    
    for (pos = 0; (!diff || outfile) && pos < len; pos += 4) {
        long_t ov = 0;  long_t nv = 0;
        get_long_val(oldm, pos, &ov);
        get_long_val(newm, pos, &nv);
        if (nv != ov) {
            diff = TRUE;
            if (outfile)
                fprintf(outfile, "0x%.4x:\t0x%.8x\t0x%.8x\n", pos, ov, nv);
        }
    }
    return diff;
}


reg_t reg_table[REG_CNT] = {
    {"%eax", REG_EAX},
    {"%ecx", REG_ECX},
    {"%edx", REG_EDX},
    {"%ebx", REG_EBX},
    {"%esp", REG_ESP},
    {"%ebp", REG_EBP},
    {"%esi", REG_ESI},
    {"%edi", REG_EDI},
};

long_t get_reg_val(mem_t *r, regid_t id)
{
    long_t val = 0;
    if (id >= REG_NONE)
        return 0;
    get_long_val(r, id*4, &val);
    return val;
}

void set_reg_val(mem_t *r, regid_t id, long_t val)
{
    if (id < REG_NONE)
        set_long_val(r, id*4, val);
}

mem_t *init_reg()
{
    return init_mem(REG_SIZE);
}

void free_reg(mem_t *r)
{
    free_mem(r);
}

mem_t *dup_reg(mem_t *oldr)
{
    return dup_mem(oldr);
}

bool_t diff_reg(mem_t *oldr, mem_t *newr, FILE *outfile)
{
    long_t pos;
    int len = oldr->len;
    bool_t diff = FALSE;
    
    if (newr->len < len)
	    len = newr->len;
    
    for (pos = 0; (!diff || outfile) && pos < len; pos += 4) {
        long_t ov = 0;
        long_t nv = 0;
        get_long_val(oldr, pos, &ov);
        get_long_val(newr, pos, &nv);
        if (nv != ov) {
            diff = TRUE;
            if (outfile)
                fprintf(outfile, "%s:\t0x%.8x\t0x%.8x\n",
                        reg_table[pos/4].name, ov, nv);
        }
    }
    return diff;
}

/* create an y86 image with registers and memory */
y86sim_t *new_y86sim(int slen)
{
    y86sim_t *sim = (y86sim_t*)malloc(sizeof(y86sim_t));
    sim->pc = 0;
    sim->r = init_reg();
    sim->m = init_mem(slen);
    sim->cc = DEFAULT_CC;
    return sim;
}

void free_y86sim(y86sim_t *sim)
{
    free_reg(sim->r);
    free_mem(sim->m);
    free((void *) sim);
}

/* load binary code and data from file to memory image */
int load_binfile(mem_t *m, FILE *f)
{
    int flen;

    clearerr(f);
    flen = fread(m->data, sizeof(byte_t), m->len, f);
    if (ferror(f)) {
        err_print("fread() failed (0x%x)", flen);
        return -1;
    }
    if (!feof(f)) {
        err_print("too large memory footprint (0x%x)", flen);
        return -1;
    }
    return 0;
}

/*
 * compute_alu: do ALU operations 
 * args
 *     op: operations (A_ADD, A_SUB, A_AND, A_XOR)
 *     argA: the first argument 
 *     argB: the second argument
 *
 * return
 *     val: the result of operation on argA and argB
 */
long_t compute_alu(alu_t op, long_t argA, long_t argB)
{
	switch (op) {
		case A_ADD:
		  return argB + argA;
		case A_SUB:
		  return argB - argA;
		case A_AND:
		  return argB & argA;
		case A_XOR:
		  return argB ^ argA;
		case A_NONE: /* act as default */
		default:
		  return 0;
	}
}

/*
 * compute_cc: modify condition codes according to operations 
 * args
 *     op: operations (A_ADD, A_SUB, A_AND, A_XOR)
 *     argA: the first argument 
 *     argB: the second argument
 *     val: the result of operation on argA and argB
 *
 * return
 *     PACK_CC: the final condition codes
 */
cc_t compute_cc(alu_t op, long_t argA, long_t argB, long_t val)
{
    bool_t zero = FALSE;
    bool_t sign = FALSE;
    bool_t ovf = FALSE;

	zero = val == 0;
	sign = val < 0;
	switch (op) {
		case A_ADD:
			if (argA > 0 && argB > 0)
				ovf = val < 0;
			else if (argA < 0 && argB < 0)
				ovf = val >= 0;
			else
				ovf = 0;
			break;
		case A_SUB:
			if (-argA > 0 && argB > 0)
				ovf = val < 0;
			else if (-argA < 0 && argB < 0)
				ovf = val >= 0;
			else
				ovf = 0;
			break;
		case A_AND: case A_XOR:
			break;
		case A_NONE:
			zero = FALSE;
			sign = FALSE;
			break;
	}
    return PACK_CC(zero,sign,ovf);
}

/*
 * cond_doit: whether do (mov or jmp) it?  
 * args
 *     PACK_CC: the current condition codes
 *     cond: conditions (C_YES, C_LE, C_L, C_E, C_NE, C_GE, C_G)
 *
 * return
 *     TRUE: do it
 *     FALSE: not do it
 */
bool_t cond_doit(cc_t cc, cond_t cond) 
{
	switch (cond) {
		case C_YES:
		  return TRUE;
		case C_LE:
		  return (GET_SF(cc) ^ GET_OF(cc))| GET_ZF(cc);
		case C_L:
		  return GET_SF(cc) ^ GET_OF(cc);
		  break;
		case C_E:
		  return GET_ZF(cc);
		case C_NE:
		  return !GET_ZF(cc);
		case C_GE:
		  return !(GET_SF(cc) ^ GET_OF(cc));
		case C_G:
		  return !(GET_SF(cc) ^ GET_OF(cc)) & !GET_ZF(cc);
		default:
		  return FALSE;
	}
}

/* 
 * nexti: execute single instruction and return status.
 * args
 *     sim: the y86 image with PC, register and memory
 *
 * return
 *     STAT_AOK: continue
 *     STAT_HLT: halt
 *     STAT_ADR: invalid instruction address
 *     STAT_INS: invalid instruction, register id, data address, stack address, ...
 */
stat_t nexti(y86sim_t *sim)
{
    byte_t codefun = 0;
    itype_t icode;
    alu_t ifun;
    long_t next_pc = sim->pc;

	regid_t regA = REG_NONE, regB = REG_NONE;
	long_t imm;

    /* get code and function (1 byte) */
    if (!get_byte_val(sim->m, next_pc, &codefun)) {
        err_print("PC = 0x%x, Invalid instruction address", sim->pc);
        return STAT_ADR;
    }
    icode = GET_ICODE(codefun);
    ifun = GET_FUN(codefun);
    next_pc++;

    /* get registers if needed (1 byte) */
	switch (icode) {
		case I_RRMOVL: case I_IRMOVL: case I_RMMOVL: case I_MRMOVL: case I_ALU: case I_POPL: case I_PUSHL:
			if (!get_byte_val(sim->m, next_pc, &codefun)) {
				err_print("PC = 0x%x, Invalid instruction address", sim->pc);
				return STAT_ADR;
			}
			regA = GET_REGA(codefun);
			regB = GET_REGB(codefun);
			next_pc++;
			break;
		default:
			break;
    }

    /* get immediate if needed (4 bytes) */
	switch (icode) {
		case I_IRMOVL: case I_RMMOVL: case I_MRMOVL: case I_JMP: case I_CALL:
			if (!get_long_val(sim->m, next_pc, &imm)) {
				err_print("PC = 0x%x, Invalid instruction address", sim->pc);
				return STAT_ADR;
			}
			next_pc += 0x4;
			break;
		default:
			break;
	}

    /* execute the instruction */
    switch (icode) {
      case I_HALT: /* 0:0 */
        return STAT_HLT;
        break;

      case I_NOP: /* 1:0 */
        sim->pc = next_pc;
        break;

      case I_RRMOVL:  /* 2:x regA:regB */
		sim->pc = next_pc;
	  	if(!cond_doit(sim->cc,ifun))
			break;
		set_reg_val(sim->r,regB,get_reg_val(sim->r,regA));
		break;

      case I_IRMOVL: /* 3:0 F:regB imm */
	    if (regA != 0xF) {
			err_print("PC = 0x%x, Invalid instruction address", sim->pc);
			return STAT_ADR;
		}
		set_reg_val(sim->r,regB,imm);
        sim->pc = next_pc;
		break;

      case I_RMMOVL: /* 4:0 regA:regB imm */
	  {
		  long_t regB_cont = get_reg_val(sim->r,regB);
		  if(!set_long_val(sim->m, imm+regB_cont, get_reg_val(sim->r,regA))) {
			  err_print("PC = 0x%x, Invalid data address 0x%x",
					  sim->pc,regB_cont+imm);
		  return STAT_ADR;
		  }
		  sim->pc = next_pc;
		  break;
	  }

      case I_MRMOVL: /* 5:0 regB:regA imm */
	  {
		  long_t cont;
		  if(!get_long_val(sim->m,get_reg_val(sim->r,regB)+imm,&cont)) {
			  err_print("PC = 0x%x, Invalid data address 0x%x",
					  sim->pc,get_reg_val(sim->r,regB)+imm);
			  return STAT_ADR;
		  }
		  set_reg_val(sim->r,regA,cont);
		  sim->pc = next_pc;
		  break;
	  }

      case I_ALU: /* 6:x regA:regB */
	  {
		  long_t regA_cont = get_reg_val(sim->r,regA),
		  		 regB_cont = get_reg_val(sim->r,regB);
		  long_t res = compute_alu(ifun,regA_cont,regB_cont);
		  sim->cc = compute_cc(ifun,regA_cont,regB_cont,res);
		  set_reg_val(sim->r,regB,res);
          sim->pc = next_pc;
		  break;
	  }

      case I_JMP: /* 7:x imm */
	  	if(cond_doit(sim->cc,ifun))
			sim->pc = imm;
		else
			sim->pc = next_pc;
		break;

      case I_CALL: /* 8:x imm */
		if(imm < 0 || imm > sim->m->len) {
			err_print("WTFatCALL %x",sim->pc);
			return STAT_ADR;
		}
		else {
			sim->pc = imm;
			if(!push_long_val(sim,next_pc)) {
				err_print("PC = 0x%x, Invalid stack address 0x%x",
						sim->pc,get_reg_val(sim->r,REG_ESP));
				return STAT_ADR;
			}
		}
		break;

      case I_RET: /* 9:0 */
	  	if(!pop_long_val(sim,&(sim->pc))) {
			err_print("PC = 0x%x, Invalid stack address 0x%x",
					sim->pc,get_reg_val(sim->r,REG_ESP));
			return STAT_ADR;
		}
		break;

      case I_PUSHL: /* A:0 regA:F */
	  	if(!push_long_val(sim,get_reg_val(sim->r,regA))) {
			err_print("PC = 0x%x, Invalid stack address 0x%x",
				sim->pc,get_reg_val(sim->r,REG_ESP));
			return STAT_ADR;
		}
		sim->pc = next_pc;
		break;

      case I_POPL: /* B:0 regA:F */
	  {
		  sim->pc = next_pc;
		  if(regB != 0xF) {
			  err_print("PC = 0x%x, Invalid instruction %.2x", sim->pc, codefun);
			  return STAT_INS;
		  }
		  long_t val;
	      if(!pop_long_val(sim,&val)) {
			  err_print("PC = 0x%x, Invalid stack address 0x%x",
				  sim->pc,get_reg_val(sim->r,REG_ESP));
			  return STAT_ADR;
		  }
		  set_reg_val(sim->r,regA,val);
          break;
	  }

      default:
        err_print("PC = 0x%x, Invalid instruction %.2x", sim->pc, codefun);
        return STAT_INS;
    }
    
    return STAT_AOK;
}

void usage(char *pname)
{
    printf("Usage: %s file.bin [max_steps]\n", pname);
    exit(0);
}

int main(int argc, char *argv[])
{
    FILE *binfile;
    int max_steps = MAX_STEP;
    y86sim_t *sim;
    mem_t *saver, *savem;
    int step = 0;
    stat_t e = STAT_AOK;

    if (argc < 2 || argc > 3)
        usage(argv[0]);

    /* set max steps */
    if (argc > 2)
        max_steps = atoi(argv[2]);

    /* load binary file to memory */
    if (strcmp(argv[1]+(strlen(argv[1])-4), ".bin"))
        usage(argv[0]); /* only support *.bin file */
    
    binfile = fopen(argv[1], "rb");
    if (!binfile) {
        err_print("Can't open binary file '%s'", argv[1]);
        exit(1);
    }

    sim = new_y86sim(MEM_SIZE);
    if (load_binfile(sim->m, binfile) < 0) {
        err_print("Failed to load binary file '%s'", argv[1]);
        free_y86sim(sim);
        exit(1);
    }
    fclose(binfile);

    /* save initial register and memory stat */
    saver = dup_reg(sim->r);
    savem = dup_mem(sim->m);

    /* execute binary code step-by-step */
    for (step = 0; step < max_steps && e == STAT_AOK; step++)
        e = nexti(sim);

    /* print final stat of y86sim */
    printf("Stopped in %d steps at PC = 0x%x.  Status '%s', CC %s\n",
            step, sim->pc, stat_name(e), cc_name(sim->cc));

    printf("Changes to registers:\n");
    diff_reg(saver, sim->r, stdout);

    printf("\nChanges to memory:\n");
    diff_mem(savem, sim->m, stdout);

    free_y86sim(sim);
    free_reg(saver);
    free_mem(savem);

    return 0;
}

总的来说就是根据给定的机器码来改变reg和mem中的状态。


Y86汇编器:

#ifndef _Y86_ASM_
#define _Y86_ASM_

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#define MAX_INSLEN  512

typedef unsigned char byte_t;
typedef int word_t;
typedef enum { FALSE, TRUE } bool_t;


/* Y86 Register (REG_NONE is a special one to indicate no register) */
typedef enum { REG_ERR=-1, REG_EAX, REG_ECX, REG_EDX, REG_EBX,
    REG_ESP, REG_EBP, REG_ESI, REG_EDI, REG_CNT, REG_NONE=0xF } regid_t;

typedef struct reg {
    char *name;
    regid_t id;
} reg_t;

#define SIZEOF_REG  4


/* Y86 Instruction */
typedef enum { I_HALT, I_NOP, I_RRMOVL, I_IRMOVL, I_RMMOVL, I_MRMOVL,
    I_ALU, I_JMP, I_CALL, I_RET, I_PUSHL, I_POPL, I_DIRECTIVE } itype_t;

/* Function code (default) */
typedef enum { F_NONE } func_t;

/* ALU code */
typedef enum { A_ADD, A_SUB, A_AND, A_XOR, A_NONE } alu_t;

/* Condition code */
typedef enum { C_YES, C_LE, C_L, C_E, C_NE, C_GE, C_G } cond_t;

/* Directive code */
typedef enum { D_DATA, D_POS, D_ALIGN } dtv_t;

/* Pack itype and func/alu/cond/dtv into single byte */
#define HPACK(hi,lo) ((((hi)&0xF)<<4)|((lo)&0xF))
#define HIGH(pack) ((pack)>>4&0xF)
#define LOW(pack) ((pack)&0xF)

/* Table used to encode information about instructions */
typedef struct instr {
    char *name;
    int len;
    byte_t code; /* code for instruction+op */
    int bytes; /* the size of instr */
} instr_t;


/* Token types: comment, instruction, error */
typedef enum{ TYPE_COMM, TYPE_INS, TYPE_ERR } type_t;

typedef struct bin {
    int addr;
    byte_t codes[6];
    int bytes;
} bin_t;

typedef struct line {
    type_t type; /* TYPE_COMM: no y86bin, TYPE_INS: both y86bin and y86asm */
    bin_t y86bin;
    char *y86asm;
    
    struct line *next;
} line_t;

/* label defined in y86 assembly code, e.g. Loop */
typedef struct symbol {
    char *name;
    int addr;
    struct symbol *next;
} symbol_t;

/* binary code need to be relocated */
typedef struct reloc {
    bin_t *y86bin;
    char *name;
    struct reloc *next;
	int entry;
} reloc_t;

#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include "y86asm.h"

//#define DEBUG_CODE

line_t *y86bin_listhead = NULL;   /* the head of y86 binary code line list*/
line_t *y86bin_listtail = NULL;   /* the tail of y86 binary code line list*/
int y86asm_lineno = 0; /* the current line number of y86 assemble code */

#define err_print(_s, _a ...) do { \
  if (y86asm_lineno < 0) \
    fprintf(stderr, "[--]: "_s"\n", ## _a); \
  else \
    fprintf(stderr, "[L%d]: "_s"\n", y86asm_lineno, ## _a); \
} while (0);


int vmaddr = 0;    /* vm addr */

/* register table */
reg_t reg_table[REG_CNT] = {
    {"%eax", REG_EAX},
    {"%ecx", REG_ECX},
    {"%edx", REG_EDX},
    {"%ebx", REG_EBX},
    {"%esp", REG_ESP},
    {"%ebp", REG_EBP},
    {"%esi", REG_ESI},
    {"%edi", REG_EDI},
};

regid_t find_register(char *name)
{
    int i;
    for (i = 0; i < REG_CNT; i++)
        if (!strncmp(name, reg_table[i].name, 4))
            return reg_table[i].id;
    return REG_ERR;
}

/* instruction set */
instr_t instr_set[] = {
    {"nop", 3,   HPACK(I_NOP, F_NONE), 1 },
    {"halt", 4,  HPACK(I_HALT, F_NONE), 1 },
    {"rrmovl", 6,HPACK(I_RRMOVL, F_NONE), 2 },
    {"cmovle", 6,HPACK(I_RRMOVL, C_LE), 2 },
    {"cmovl", 5, HPACK(I_RRMOVL, C_L), 2 },
    {"cmove", 5, HPACK(I_RRMOVL, C_E), 2 },
    {"cmovne", 6,HPACK(I_RRMOVL, C_NE), 2 },
    {"cmovge", 6,HPACK(I_RRMOVL, C_GE), 2 },
    {"cmovg", 5, HPACK(I_RRMOVL, C_G), 2 },
    {"irmovl", 6,HPACK(I_IRMOVL, F_NONE), 6 },
    {"rmmovl", 6,HPACK(I_RMMOVL, F_NONE), 6 },
    {"mrmovl", 6,HPACK(I_MRMOVL, F_NONE), 6 },
    {"addl", 4,  HPACK(I_ALU, A_ADD), 2 },
    {"subl", 4,  HPACK(I_ALU, A_SUB), 2 },
    {"andl", 4,  HPACK(I_ALU, A_AND), 2 },
    {"xorl", 4,  HPACK(I_ALU, A_XOR), 2 },
    {"jmp", 3,   HPACK(I_JMP, C_YES), 5 },
    {"jle", 3,   HPACK(I_JMP, C_LE), 5 },
    {"jl", 2,    HPACK(I_JMP, C_L), 5 },
    {"je", 2,    HPACK(I_JMP, C_E), 5 },
    {"jne", 3,   HPACK(I_JMP, C_NE), 5 },
    {"jge", 3,   HPACK(I_JMP, C_GE), 5 },
    {"jg", 2,    HPACK(I_JMP, C_G), 5 },
    {"call", 4,  HPACK(I_CALL, F_NONE), 5 },
    {"ret", 3,   HPACK(I_RET, F_NONE), 1 },
    {"pushl", 5, HPACK(I_PUSHL, F_NONE), 2 },
    {"popl", 4,  HPACK(I_POPL, F_NONE),  2 },
    {".byte", 5, HPACK(I_DIRECTIVE, D_DATA), 1 },
    {".word", 5, HPACK(I_DIRECTIVE, D_DATA), 2 },
    {".long", 5, HPACK(I_DIRECTIVE, D_DATA), 4 },
    {".pos", 4,  HPACK(I_DIRECTIVE, D_POS), 0 },
    {".align", 6,HPACK(I_DIRECTIVE, D_ALIGN), 0 },
    {NULL, 1,    0   , 0 } //end
};

instr_t *find_instr(char *name)
{
    int i;
    for (i = 0; instr_set[i].name; i++)
	if (strncmp(instr_set[i].name, name, instr_set[i].len) == 0)
	    return &instr_set[i];
    return NULL;
}

/* symbol table (don't forget to init and finit it) */
symbol_t *symtab = NULL;

/*
 * find_symbol: scan table to find the symbol
 * args
 *     name: the name of symbol
 *
 * return
 *     symbol_t: the 'name' symbol
 *     NULL: not exist
 */
symbol_t *find_symbol(char *name)
{
	symbol_t *p = symtab->next;
	while(p != NULL) {
		if(strcmp(name,p->name)==0)
			return p;
		p = p->next;
	}
    return NULL;
}

/*
 * add_symbol: add a new symbol to the symbol table
 * args
 *     name: the name of symbol
 *
 * return
 *     0: success
 *     -1: error, the symbol has exist
 */
int add_symbol(char *name)
{    
    /* check duplicate */
	if(find_symbol(name))
		return -1;
    /* create new symbol_t (don't forget to free it)*/
    /* add the new symbol_t to symbol table */
	symbol_t *p = (symbol_t*)malloc(sizeof(symbol_t));
	p->name = (char*)malloc((strlen(name)+1)*sizeof(char));
	strcpy(p->name,name);
	p->addr = vmaddr;
	p->next = symtab->next;
	symtab->next = p;
    return 0;
}

/* relocation table (don't forget to init and finit it) */
reloc_t *reltab = NULL;

/*
 * add_reloc: add a new relocation to the relocation table
 * args
 *     name: the name of symbol
 *
 * return
 *     0: success
 *     -1: error, the symbol has exist
 */
void add_reloc(char *name, bin_t *bin,int entry)
{
    /* create new reloc_t (don't forget to free it)*/
	reloc_t *p = (reloc_t*)malloc(sizeof(reloc_t));
	p->name = (char*)malloc(sizeof(char) * (strlen(name)+1));
	strcpy(p->name,name);
	p->y86bin = bin;
	p->entry = entry;
    
    /* add the new reloc_t to relocation table */
	p->next = reltab->next;
	reltab->next=p;
}


/* macro for parsing y86 assembly code */
#define CHECK_PARSE_ERR(_r,_s) do {if(_r == PARSE_ERR){line->type=TYPE_ERR;err_print(_s);goto out;}} while(0);
#define IS_DIGIT(s) ((*(s)>='0' && *(s)<='9') || *(s)=='-' || *(s)=='+')
#define IS_LETTER(s) ((*(s)>='a' && *(s)<='z') || (*(s)>='A' && *(s)<='Z'))
#define IS_COMMENT(s) (*(s)=='#')
#define IS_REG(s) (*(s)=='%')
#define IS_IMM(s) (*(s)=='$')

#define IS_BLANK(s) (*(s)==' ' || *(s)=='\t')
#define IS_END(s) (*(s)=='\0')

#define SKIP_BLANK(s) do {  \
  while(!IS_END(s) && IS_BLANK(s))  \
    (s)++;    \
} while(0);

/* return value from different parse_xxx function */
typedef enum { PARSE_ERR=-1, PARSE_REG, PARSE_DIGIT, PARSE_SYMBOL, 
    PARSE_MEM, PARSE_DELIM, PARSE_INSTR, PARSE_LABEL} parse_t;

/*
 * parse_instr: parse an expected data token (e.g., 'rrmovl')
 * args
 *     ptr: point to the start of string
 *     inst: point to the inst_t within instr_set
 *
 * return
 *     PARSE_INSTR: success, move 'ptr' to the first char after token,
 *                            and store the pointer of the instruction to 'inst'
 *     PARSE_ERR: error, the value of 'ptr' and 'inst' are undefined
 */
parse_t parse_instr(char **ptr, instr_t **inst)
{
    char *cur = *ptr;
    instr_t *tmp;

    /* skip the blank */
    SKIP_BLANK(cur);
    if (IS_END(cur))
        return PARSE_ERR;

    /* find_instr and check end */
    tmp = find_instr(cur);
    if (tmp == NULL)
        return PARSE_ERR;

    cur += tmp->len;
    if (!IS_END(cur) && !IS_BLANK(cur))
        return PARSE_ERR;

    /* set 'ptr' and 'inst' */
    *inst = tmp;
    *ptr = cur;
    return PARSE_INSTR;
}

/*
 * parse_delim: parse an expected delimiter token (e.g., ',')
 * args
 *     ptr: point to the start of string
 *
 * return
 *     PARSE_DELIM: success, move 'ptr' to the first char after token
 *     PARSE_ERR: error, the value of 'ptr' and 'delim' are undefined
 */
parse_t parse_delim(char **ptr, char delim)
{
    /* skip the blank and check */
	SKIP_BLANK(*ptr);
	if(**ptr==delim) {
		/* set 'ptr' */
		(*ptr)++;
		return PARSE_DELIM;
	}
	return PARSE_ERR;
}

/*
 * parse_reg: parse an expected register token (e.g., '%eax')
 * args
 *     ptr: point to the start of string
 *     regid: point to the regid of register
 *
 * return
 *     PARSE_REG: success, move 'ptr' to the first char after token, 
 *                         and store the regid to 'regid'
 *     PARSE_ERR: error, the value of 'ptr' and 'regid' are undefined
 */
parse_t parse_reg(char **ptr, regid_t *regid)
{
    /* skip the blank and check */
	SKIP_BLANK(*ptr);
	if(!IS_REG(*ptr))
		return PARSE_ERR;
    /* find register */
		/* set 'ptr' and 'regid' */
	if((*regid = find_register(*ptr))!=REG_ERR) {
		*ptr += 4;
		return PARSE_REG;
	}
    return PARSE_ERR;
}

/*
 * parse_symbol: parse an expected symbol token (e.g., 'Main')
 * args
 *     ptr: point to the start of string
 *     name: point to the name of symbol (should be allocated in this function)
 *
 * return
 *     PARSE_SYMBOL: success, move 'ptr' to the first char after token,
 *                               and allocate and store name to 'name'
 *     PARSE_ERR: error, the value of 'ptr' and 'name' are undefined
 */
parse_t parse_symbol(char **ptr, char **name)
{
    /* skip the blank and check */
	SKIP_BLANK(*ptr);
	char* beg = *ptr;
	while(IS_LETTER(*ptr) || (**ptr <= '9' && **ptr >= '0'))
		++(*ptr);
	if(*ptr == beg)
		return PARSE_ERR;
	size_t len = *ptr-beg;
    /* allocate name and copy to it */
	*name = (char*)malloc(sizeof(char)*(len+2));
	strncpy(*name,beg,len);
	(*name)[len] = '\0';
    /* set 'ptr' and 'name' */

    return PARSE_SYMBOL;
}

/*
 * parse_digit: parse an expected digit token (e.g., '0x100')
 * args
 *     ptr: point to the start of string
 *     value: point to the value of digit
 *
 * return
 *     PARSE_DIGIT: success, move 'ptr' to the first char after token
 *                            and store the value of digit to 'value'
 *     PARSE_ERR: error, the value of 'ptr' and 'value' are undefined
 */
parse_t parse_digit(char **ptr, long *value)
{
    /* skip the blank and check */
    /* calculate the digit, (NOTE: see strtoll()) */
	char* end_check;
	*value = strtoll(*ptr,&end_check,0);
	if(end_check == *ptr)
		return PARSE_ERR;
	*ptr = end_check;
    /* set 'ptr' and 'value' */
    return PARSE_DIGIT;
}

/*
 * parse_imm: parse an expected immediate token (e.g., '$0x100' or 'STACK')
 * args
 *     ptr: point to the start of string
 *     name: point to the name of symbol (should be allocated in this function)
 *     value: point to the value of digit
 *
 * return
 *     PARSE_DIGIT: success, the immediate token is a digit,
 *                            move 'ptr' to the first char after token,
 *                            and store the value of digit to 'value'
 *     PARSE_SYMBOL: success, the immediate token is a symbol,
 *                            move 'ptr' to the first char after token,
 *                            and allocate and store name to 'name' 
 *     PARSE_ERR: error, the value of 'ptr', 'name' and 'value' are undefined
 */
parse_t parse_imm(char **ptr, char **name, long *value)
{
    /* skip the blank and check */
	SKIP_BLANK(*ptr);
    /* if IS_IMM, then parse the digit */
	if(IS_IMM(*ptr)) {
		++(*ptr); /* jump over $ */
		if(parse_digit(ptr,value) == PARSE_ERR)
			return PARSE_ERR;
		return PARSE_DIGIT;
	}
    /* if IS_LETTER, then parse the symbol */
	if(IS_LETTER(*ptr)) {
		if(parse_symbol(ptr,name) == PARSE_ERR)
			return PARSE_ERR;
		return PARSE_SYMBOL;
	}
    
    /* set 'ptr' and 'name' or 'value' */
    return PARSE_ERR;
}

/*
 * parse_mem: parse an expected memory token (e.g., '8(%ebp)')
 * args
 *     ptr: point to the start of string
 *     value: point to the value of digit
 *     regid: point to the regid of register
 *
 * return
 *     PARSE_MEM: success, move 'ptr' to the first char after token,
 *                          and store the value of digit to 'value',
 *                          and store the regid to 'regid'
 *     PARSE_ERR: error, the value of 'ptr', 'value' and 'regid' are undefined
 */
parse_t parse_mem(char **ptr, long *value, regid_t *regid)
{
    /* skip the blank and check */
	SKIP_BLANK(*ptr);
	parse_digit(ptr,value);
	if(parse_delim(ptr,'(') == PARSE_ERR)
		return PARSE_ERR;
	if(parse_reg(ptr,regid) == PARSE_ERR)
		return PARSE_ERR;
	if(parse_delim(ptr,')') == PARSE_ERR)
		return PARSE_ERR;

    /* calculate the digit and register, (ex: (%ebp) or 8(%ebp)) */

    /* set 'ptr', 'value' and 'regid' */

    return PARSE_MEM;
}

/*
 * parse_data: parse an expected data token (e.g., '0x100' or 'array')
 * args
 *     ptr: point to the start of string
 *     name: point to the name of symbol (should be allocated in this function)
 *     value: point to the value of digit
 *
 * return
 *     PARSE_DIGIT: success, data token is a digit,
 *                            and move 'ptr' to the first char after token,
 *                            and store the value of digit to 'value'
 *     PARSE_SYMBOL: success, data token is a symbol,
 *                            and move 'ptr' to the first char after token,
 *                            and allocate and store name to 'name' 
 *     PARSE_ERR: error, the value of 'ptr', 'name' and 'value' are undefined
 */
parse_t parse_data(char **ptr, char **name, long *value)
{
    /* skip the blank and check */

    /* if IS_DIGIT, then parse the digit */

    /* if IS_LETTER, then parse the symbol */

    /* set 'ptr', 'name' and 'value' */

    return PARSE_ERR;
}

/*
 * parse_label: parse an expected label token (e.g., 'Loop:')
 * args
 *     ptr: point to the start of string
 *     name: point to the name of symbol (should be allocated in this function)
 *
 * return
 *     PARSE_LABEL: success, move 'ptr' to the first char after token
 *                            and allocate and store name to 'name'
 *     PARSE_ERR: error, the value of 'ptr' is undefined
 */
parse_t parse_label(char **ptr, char **name)
{
    /* skip the blank and check */
	char *loc,*check;
	SKIP_BLANK(*ptr);
	if((loc = strchr(*ptr,':')) != NULL) {
		check = strchr(*ptr,'#');
		if(check < loc && check!=NULL)
			return PARSE_ERR;
		/* allocate name and copy to it */
		size_t len = loc-*ptr;
		*name = (char*)malloc(sizeof(char) * (len+1));
		/* set 'ptr' and 'name' */
		strncpy(*name,*ptr,len);
		*ptr = loc+1;
		(*name)[len] = '\0';
		return PARSE_LABEL;
	}
	return PARSE_ERR;
}

/*
 * parse_line: parse a line of y86 code (e.g., 'Loop: mrmovl (%ecx), %esi')
 * (you could combine above parse_xxx functions to do it)
 * args
 *     line: point to a line_t data with a line of y86 assembly code
 *
 * return
 *     PARSE_XXX: success, fill line_t with assembled y86 code
 *     PARSE_ERR: error, try to print err information (e.g., instr type and line number)
 */
type_t parse_line(line_t *line)
{
    bin_t *y86bin;
    char * y86asm;  /* a copy of line->y86asm */
    char *label = NULL;
    instr_t *inst = NULL;

    char *cur;
    int ret;

    y86bin = &line->y86bin;
    y86asm = (char *)
        malloc(sizeof(char) * (strlen(line->y86asm) + 1));
    strcpy(y86asm, line->y86asm);
    cur = y86asm;

/* when finish parse an instruction or label, we still need to continue check 
* e.g., 
*  Loop: mrmovl (%ebp), %ecx
*           call SUM  #invoke SUM function */
cont:

    /* skip blank and check IS_END */
    SKIP_BLANK(cur);
    if (IS_END(cur))
        goto out; /* done */
    
    /* is a comment ? */
    if (IS_COMMENT(cur)) {
        goto out; /* skip rest */
    }


    /* is a label ? */
    ret = parse_label(&cur, &label);
    if (ret == PARSE_LABEL) {
        /* add new symbol */
        if (add_symbol(label) < 0) {
            line->type = TYPE_ERR;
            err_print("Dup symbol:%s", label);
            goto out;
        }

        /* set type and y86bin */
        line->type = TYPE_INS;
        line->y86bin.addr = vmaddr;

        /* continue */
        goto cont;
    }

    /* is an instruction ? */
    ret = parse_instr(&cur, &inst);
	CHECK_PARSE_ERR(ret,"Invalid instr");

    /* set type and y86bin */
    line->type = TYPE_INS;
    y86bin->addr = vmaddr;
    y86bin->codes[0] = inst->code;
    y86bin->bytes = inst->bytes;

    /* update vmaddr */    
    vmaddr += inst->bytes;

    /* parse the rest of instruction according to the itype */
    switch (HIGH(inst->code)) {
      /* further partition the y86 instructions according to the format */
      case I_HALT:  /* 0:0 - e.g., halt */
      case I_NOP:   /* 1:0 - e.g., nop */
      case I_RET: { /* 9:0 - e.g., ret" */
        goto cont;
      }

      case I_PUSHL: /* A:0 regA:F - e.g., pushl %esp */
      case I_POPL: {/* B:0 regA:F - e.g., popl %ebp */
        /* parse register */
		regid_t regid;
		ret = parse_reg(&cur,®id);
		CHECK_PARSE_ERR(ret,"Invalid REG");
        /* set y86bin codes */
		y86bin->codes[1] = HPACK(regid,0xF);
        goto cont;
      }
   
      case I_RRMOVL:/* 2:x regA,regB - e.g., rrmovl %esp, %ebp */
      case I_ALU: { /* 6:x regA,regB - e.g., xorl %eax, %eax */
		regid_t regidA,regidB;
		ret = parse_reg(&cur,®idA);
		CHECK_PARSE_ERR(ret,"Invalid REG");

		ret = parse_delim(&cur,',');
		CHECK_PARSE_ERR(ret,"Invalid ','");

		ret = parse_reg(&cur,®idB);
		CHECK_PARSE_ERR(ret,"Invalid REG");

		y86bin->codes[1] = HPACK(regidA,regidB);
        goto cont;
      }
      
      case I_IRMOVL: {  /* 3:0 Imm, regB - e.g., irmovl $-1, %ebx */
		long lval;
		char* symbol;
		regid_t regid;
		ret = parse_imm(&cur,&symbol,&lval);
		CHECK_PARSE_ERR(ret,"Invalid Immediate");

		int ret1 = parse_delim(&cur,',');
		CHECK_PARSE_ERR(ret1,"Invalid ','");

		ret1 = parse_reg(&cur,®id);
		CHECK_PARSE_ERR(ret1,"Invalid REG");

		y86bin->codes[1] = HPACK(0xF,regid);
		if (ret == PARSE_DIGIT) {
			y86bin->codes[2] = lval & 0xFF;
			y86bin->codes[3] = (lval>>8) & 0xFF;
			y86bin->codes[4] = (lval>>16) & 0xFF;
			y86bin->codes[5] = (lval>>24) & 0xFF;
		}
		else if (ret == PARSE_SYMBOL)
			add_reloc(symbol,y86bin,2);
        goto cont;
      }
      
      case I_RMMOVL: {  /* 4:0 regA, D(regB) - e.g., rmmovl %eax, 8(%esp)  */
		regid_t regidA,regidB;
		long lval;
		ret = parse_reg(&cur,®idA);
		CHECK_PARSE_ERR(ret,"Invalid REG");

		ret = parse_delim(&cur,',');
		CHECK_PARSE_ERR(ret,"Invalid ','");
		
		ret = parse_mem(&cur,&lval,®idB);
		CHECK_PARSE_ERR(ret,"Invalid MEM");

		y86bin->codes[1] = HPACK(regidA,regidB);
		y86bin->codes[2] = lval & 0xFF;
		y86bin->codes[3] = (lval>>8) & 0xFF;
		y86bin->codes[4] = (lval>>16) & 0xFF;
		y86bin->codes[5] = (lval>>24) & 0xFF;
        goto cont;
      }
      
      case I_MRMOVL: {  /* 5:0 D(regB), regA - e.g., mrmovl 8(%ebp), %ecx */
		regid_t regidA,regidB;
		long lval;
		ret = parse_mem(&cur,&lval,®idB);
		CHECK_PARSE_ERR(ret,"Invalid MEM");

		ret = parse_delim(&cur,',');
		CHECK_PARSE_ERR(ret,"Invalid ','");
		
		ret = parse_reg(&cur,®idA);
		CHECK_PARSE_ERR(ret,"Invalid REG");

		y86bin->codes[1] = HPACK(regidA,regidB);
		y86bin->codes[2] = lval & 0xFF;
		y86bin->codes[3] = (lval>>8) & 0xFF;
		y86bin->codes[4] = (lval>>16) & 0xFF;
		y86bin->codes[5] = (lval>>24) & 0xFF;
        goto cont;
      }
      
      case I_JMP:   /* 7:x dest - e.g., je End */
      case I_CALL: {/* 8:x dest - e.g., call Main */
		long lval;
		char* symbol;
		ret = parse_imm(&cur,&symbol,&lval);
		CHECK_PARSE_ERR(ret,"Invalid DEST");
		
		if(ret == PARSE_SYMBOL)
			add_reloc(symbol,y86bin,1);
		if(ret == PARSE_DIGIT) {
			y86bin->codes[1] = lval & 0xFF;
			y86bin->codes[2] = (lval>>8) & 0xFF;
			y86bin->codes[3] = (lval>>16) & 0xFF;
			y86bin->codes[4] = (lval>>24) & 0xFF;
		}
        goto cont;
      }
      
      case I_DIRECTIVE: {
        /* further partition directive according to dtv_t */
        switch (LOW(inst->code)) {
          case D_DATA: {    /* .long data - e.g., .long 0xC0 */
			long lval;
			char* symbol;
			ret = parse_digit(&cur,&lval);
			/*CHECK_PARSE_ERR(ret,"Invalid digit");*/
			if(ret == PARSE_ERR)
			{
				ret = parse_symbol(&cur,&symbol);
				add_reloc(symbol,y86bin,0);
			}
			y86bin->codes[0] = lval & 0xFF;
			if(inst->bytes>=2)
				y86bin->codes[1] = (lval>>8) & 0xFF;
			if(inst->bytes==4) {
				y86bin->codes[2] = (lval>>16) & 0xFF;
				y86bin->codes[3] = (lval>>24) & 0xFF;
			}
            goto cont;
          }
          
          case D_POS: {   /* .pos D - e.g., .pos 0x100 */
			long pos;
			ret = parse_digit(&cur,&pos);
			CHECK_PARSE_ERR(ret,"Invalid digit");

			vmaddr = pos;
			y86bin->addr = pos;
            goto cont;
          }
          
          case D_ALIGN: {   /* .align D - e.g., .align 4 */
			long align;
			ret = parse_digit(&cur,&align);
			CHECK_PARSE_ERR(ret,"Invalid digit");
			
			if(vmaddr % align != 0) {
				vmaddr += align - vmaddr % align;
				y86bin->addr = vmaddr;
			}
            goto cont;
          }
          default:
            line->type = TYPE_ERR;
            err_print("Unknown directive");
            goto out;
        }
        break;
      }
      default:
        line->type = TYPE_ERR;
        err_print("Unknown instr");
        goto out;
    }

out:
    free(y86asm);
    return line->type;
}

/*
 * assemble: assemble an y86 file (e.g., 'asum.ys')
 * args
 *     in: point to input file (an y86 assembly file)
 *
 * return
 *     0: success, assmble the y86 file to a list of line_t
 *     -1: error, try to print err information (e.g., instr type and line number)
 */
int assemble(FILE *in)
{
    static char asm_buf[MAX_INSLEN]; /* the current line of asm code */
    line_t *line;
    int slen;
    char *y86asm;

    /* read y86 code line-by-line, and parse them to generate raw y86 binary code list */
    while (fgets(asm_buf, MAX_INSLEN, in) != NULL) {
        slen  = strlen(asm_buf);
        if ((asm_buf[slen-1] == '\n') || (asm_buf[slen-1] == '\r')) { 
            asm_buf[--slen] = '\0'; /* replace terminator */
        }

        /* store y86 assembly code */
        y86asm = (char *)malloc(sizeof(char) * (slen + 1)); // free in finit
        strcpy(y86asm, asm_buf);

        line = (line_t *)malloc(sizeof(line_t)); // free in finit
        memset(line, '\0', sizeof(line_t));

        /* set defualt */
        line->type = TYPE_COMM;
        line->y86asm = y86asm;
        line->next = NULL;

        /* add to y86 binary code list */
        y86bin_listtail->next = line;
        y86bin_listtail = line;
        y86asm_lineno ++;

        /* parse */
        if (parse_line(line) == TYPE_ERR)
            return -1;
    }

    /* skip line number information in err_print() */
    y86asm_lineno = -1;
    return 0;
}

/*
 * relocate: relocate the raw y86 binary code with symbol address
 *
 * return
 *     0: success
 *     -1: error, try to print err information (e.g., addr and symbol)
 */
int relocate(void)
{
    reloc_t *rtmp = NULL;
    
    rtmp = reltab->next;
    while (rtmp) {
        /* find symbol */
		symbol_t *symbol = find_symbol(rtmp->name);
		if(symbol == NULL) {
			err_print("Unknown symbol:'%s'",rtmp->name);
			return -1;
		}
        /* relocate y86bin according itype */
		int entry = rtmp->entry;
		rtmp->y86bin->codes[entry] = symbol->addr & 0xFF;
		rtmp->y86bin->codes[entry+1] = (symbol->addr >> 8) & 0xFF;
		rtmp->y86bin->codes[entry+2] = (symbol->addr >> 16) & 0xFF;
		rtmp->y86bin->codes[entry+3] = (symbol->addr >> 24) & 0xFF;
        /* next */
        rtmp = rtmp->next;
    }
    return 0;
}

/*
 * binfile: generate the y86 binary file
 * args
 *     out: point to output file (an y86 binary file)
 *
 * return
 *     0: success
 *     -1: error
 */
int binfile(FILE *out)
{
    /* prepare image with y86 binary code */
	line_t *tmp = y86bin_listhead->next;
	char *buf = (char*)calloc(1,MAX_INSLEN * 6);
	char *buf_beg = buf;
	long pos;
	while(tmp != NULL) {
		buf = buf_beg + tmp->y86bin.addr;
		memcpy(buf,tmp->y86bin.codes,tmp->y86bin.bytes);
		if(tmp->y86bin.bytes!=0)
			pos = tmp->y86bin.addr+tmp->y86bin.bytes;
		tmp = tmp->next;
	}
    /* binary write y86 code to output file (NOTE: see fwrite()) */
	fwrite(buf_beg,1,pos,out);
    return 0;
}


/* whether print the readable output to screen or not ? */
bool_t screen = FALSE; 

static void hexstuff(char *dest, int value, int len)
{
    int i;
    for (i = 0; i < len; i++) {
        char c;
        int h = (value >> 4*i) & 0xF;
        c = h < 10 ? h + '0' : h - 10 + 'a';
        dest[len-i-1] = c;
    }
}

void print_line(line_t *line)
{
    char buf[26];

    /* line format: 0xHHH: cccccccccccc | <line> */
    if (line->type == TYPE_INS) {
        bin_t *y86bin = &line->y86bin;
        int i;
        
        strcpy(buf, "  0x000:              | ");
        
        hexstuff(buf+4, y86bin->addr, 3);
        if (y86bin->bytes > 0)
            for (i = 0; i < y86bin->bytes; i++)
                hexstuff(buf+9+2*i, y86bin->codes[i]&0xFF, 2);
    } else {
        strcpy(buf, "                      | ");
    }

    printf("%s%s\n", buf, line->y86asm);
}

/* 
 * print_screen: dump readable binary and assembly code to screen
 * (e.g., Figure 4.8 in ICS book)
 */
void print_screen(void)
{
    line_t *tmp = y86bin_listhead->next;
    
    /* line by line */
    while (tmp != NULL) {
        print_line(tmp);
        tmp = tmp->next;
    }
}

/* init and finit */
void init(void)
{
    reltab = (reloc_t *)malloc(sizeof(reloc_t)); // free in finit
    memset(reltab, 0, sizeof(reloc_t));

    symtab = (symbol_t *)malloc(sizeof(symbol_t)); // free in finit
    memset(symtab, 0, sizeof(symbol_t));

    y86bin_listhead = (line_t *)malloc(sizeof(line_t)); // free in finit
    memset(y86bin_listhead, 0, sizeof(line_t));
    y86bin_listtail = y86bin_listhead;
    y86asm_lineno = 0;
}

void finit(void)
{
    reloc_t *rtmp = NULL;
    do {
        rtmp = reltab->next;
        if (reltab->name) 
            free(reltab->name);
        free(reltab);
        reltab = rtmp;
    } while (reltab);
    
    symbol_t *stmp = NULL;
    do {
        stmp = symtab->next;
        if (symtab->name) 
            free(symtab->name);
        free(symtab);
        symtab = stmp;
    } while (symtab);

    line_t *ltmp = NULL;
    do {
        ltmp = y86bin_listhead->next;
        if (y86bin_listhead->y86asm) 
            free(y86bin_listhead->y86asm);
        free(y86bin_listhead);
        y86bin_listhead = ltmp;
    } while (y86bin_listhead);
}

static void usage(char *pname)
{
    printf("Usage: %s [-v] file.ys\n", pname);
    printf("   -v print the readable output to screen\n");
    exit(0);
}

void debug_func()
{
	/* test parse_label*/
	char *ptr = "  test:1234",*name = NULL;
	if(parse_label(&ptr,&name) == PARSE_ERR)
		printf("fail\n");
	else
		printf("success\nlabelname:%s\nptr points to %c\n",name,*ptr);
	/* test parse_instr */
	char *ptr2 = " ixmovl 1234";
	instr_t *ins;
	if(parse_instr(&ptr2,&ins) == PARSE_ERR)
		printf("fail\n");
	else
		printf("success\n");
	/* test parse_symbol */
	char *ptr3 = " symbol,f5";
	free(name);
	parse_symbol(&ptr3,&name);
	printf("OK symbol:%s\n",name);
}

int main(int argc, char *argv[])
{
#ifdef DEBUG_CODE
	debug_func();
#endif
    int rootlen;
    char infname[512];
    char outfname[512];
    int nextarg = 1;
    FILE *in = NULL, *out = NULL;
    
    if (argc < 2)
        usage(argv[0]);
    
    if (argv[nextarg][0] == '-') {
        char flag = argv[nextarg][1];
        switch (flag) {
          case 'v':
            screen = TRUE;
            nextarg++;
            break;
          default:
            usage(argv[0]);
        }
    }

    /* parse input file name */
    rootlen = strlen(argv[nextarg])-3;
    /* only support the .ys file */
    if (strcmp(argv[nextarg]+rootlen, ".ys"))
        usage(argv[0]);
    
    if (rootlen > 500) {
        err_print("File name too long");
        exit(1);
    }


    /* init */
    init();

    
    /* assemble .ys file */
    strncpy(infname, argv[nextarg], rootlen);
    strcpy(infname+rootlen, ".ys");
    in = fopen(infname, "r");
    if (!in) {
        err_print("Can't open input file '%s'", infname);
        exit(1);
    }
    
    if (assemble(in) < 0) {
        err_print("Assemble y86 code error");
        fclose(in);
        exit(1);
    }
    fclose(in);


    /* relocate binary code */
    if (relocate() < 0) {
        err_print("Relocate binary code error");
        exit(1);
    }


    /* generate .bin file */
    strncpy(outfname, argv[nextarg], rootlen);
    strcpy(outfname+rootlen, ".bin");
    out = fopen(outfname, "wb");
    if (!out) {
        err_print("Can't open output file '%s'", outfname);
        exit(1);
    }

    if (binfile(out) < 0) {
        err_print("Generate binary file error");
        fclose(out);
        exit(1);
    }
    fclose(out);
    
    /* print to screen (.yo file) */
    if (screen)
       print_screen(); 

    /* finit */
    finit();
    return 0;
}

比较让我没想到的地方是关于label和relocate的实现和操作,一直以为只有链接的时候需要重定位,没想到单独汇编单个文件的时候可能因为label出现的次序不同也需要重定位。自己实现那几个函数的话估计真的得花比malloc更加多的时间。

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
ava实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),可运行高分资源 Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现的毕业设计&&课程设计(包含运行文档+数据库+前后端代码),Java实现
C语言是一种广泛使用的编程语言,它具有高效、灵活、可移植性强等特点,被广泛应用于操作系统、嵌入式系统、数据库、编译器等领域的开发。C语言的基本语法包括变量、数据类型、运算符、控制结构(如if语句、循环语句等)、函数、指针等。下面详细介绍C语言的基本概念和语法。 1. 变量和数据类型 在C语言中,变量用于存储数据,数据类型用于定义变量的类型和范围。C语言支持多种数据类型,包括基本数据类型(如int、float、char等)和复合数据类型(如结构体、联合等)。 2. 运算符 C语言中常用的运算符包括算术运算符(如+、、、/等)、关系运算符(如==、!=、、=、<、<=等)、逻辑运算符(如&&、||、!等)。此外,还有位运算符(如&、|、^等)和指针运算符(如、等)。 3. 控制结构 C语言中常用的控制结构包括if语句、循环语句(如for、while等)和switch语句。通过这些控制结构,可以实现程序的分支、循环和多路选择等功能。 4. 函数 函数是C语言中用于封装代码的单元,可以实现代码的复用和模块化。C语言中定义函数使用关键字“void”或返回值类型(如int、float等),并通过“{”和“}”括起来的代码块来实现函数的功能。 5. 指针 指针是C语言中用于存储变量地址的变量。通过指针,可以实现对内存的间接访问和修改。C语言中定义指针使用星号()符号,指向数组、字符串和结构体等数据结构时,还需要注意数组名和字符串常量的特殊性质。 6. 数组和字符串 数组是C语言中用于存储同类型数据的结构,可以通过索引访问和修改数组中的元素。字符串是C语言中用于存储文本数据的特殊类型,通常以字符串常量的形式出现,用双引号("...")括起来,末尾自动添加'\0'字符。 7. 结构体和联合 结构体和联合是C语言中用于存储不同类型数据的复合数据类型。结构体由多个成员组成,每个成员可以是不同的数据类型;联合由多个变量组成,它们共用同一块内存空间。通过结构体和联合,可以实现数据的封装和抽象。 8. 文件操作 C语言中通过文件操作函数(如fopen、fclose、fread、fwrite等)实现对文件的读写操作。文件操作函数通常返回文件指针,用于表示打开的文件。通过文件指针,可以进行文件的定位、读写等操作。 总之,C语言是一种功能强大、灵活高效的编程语言,广泛应用于各种领域。掌握C语言的基本语法和数据结构,可以为编程学习和实践打下坚实的基础。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值