USTC ICS(2023Fall) Lab7 LC-3 Assembler

C++代码

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>

using namespace std;

vector<string> read_asm_file(const string &filename);
void write_output_file(const string &filename, const vector<string> &output);
vector<string> Assemble(const vector<string>& lines);

int npower_16(int n);//return 16^n
int Hex2Num(char ch);//turn a character to the correspond number in hex repreinstructiontation
int atoi_hex(string strInValue);//replace a hex string to it's value
int s_find(string str, char c, int time);//return the position of the time'th character 'c' in the string str
string Pretreat(string line);//delete the label and construct symbol_address table
string dec_to_2com(int n, int digit);//turn a decimical number n to it's "digit" digits 2's complements
string get_first_word(string str);//return the first word(divided by space) in the instructiontence

//translate each instrution from a lc-3 code line string to it's machine code string(end with \n)
string translate_instruction(const string &instruction);
string trans_ADD(string instruction);
string trans_AND(string instruction);
string trans_NOT(string instruction);
string trans_BR(string instruction);
string trans_JMP(string instruction);
string trans_JSR(string instruction);
string trans_JSRR(string instruction);
string trans_LD(string instruction);
string trans_LDI(string instruction);
string trans_LDR(string instruction);
string trans_LEA(string instruction);
string trans_RET(string instruction);
string trans_RTI(string instruction);
string trans_ST(string instruction);
string trans_STI(string instruction);
string trans_STR(string instruction);
string trans_TRAP(string instruction);
string trans_FILL(string instruction);
string trans_BLKW(string instruction);
string trans_STRINGZ(string instruction);

int start_address;//the address after .ORIG
int current_address;//record the address of the current instruction
map<string, int> SA;//symbol_address table

主函数

int main(int argc, char *argv[]){
    if (argc != 3){
        cerr << "Usage: " << argv[0] << " <input_file.asm> <output_file.txt>" << endl;
        return 1;
    }
	string input_filename = argv[1];
	string output_filename = argv[2];
    vector<string> input_lines = read_asm_file(input_filename);
	Pretreat(input_lines[0]);//find the start address
	vector<string> output_lines;
	output_lines.push_back(dec_to_2com(start_address,16));
    vector<string> pretreated_lines;
	// use the iterator to skip the first line
    auto iter = input_lines.begin();
    ++iter;  // skip the first line
	for (; iter != input_lines.end(); ++iter) {
        string& line = *iter;
        string pretreated_line = Pretreat(line);
		if(pretreated_line!="SKIP")
        	pretreated_lines.push_back(pretreated_line);
		else
			break;
    }
	auto assembled_lines = Assemble(pretreated_lines);
	output_lines.insert(output_lines.end(), assembled_lines.begin(), assembled_lines.end());
    write_output_file(output_filename, output_lines);
    return 0;
}

文件的读取与写入

vector<string> read_asm_file(const string &filename){
    vector<string> lines;
    string line;
    ifstream file(filename);
    if (file.is_open()){
        while (getline(file, line))
            lines.push_back(line);
        file.close();
    }
    else
        cerr << "Unable to open file: " << filename << endl;
    return lines;
}

void write_output_file(const string &filename, const vector<string> &output){
    ofstream file(filename);
	for(const auto &line:output){
		string temp;
		for(int i=0;i<line.length();i++)
			if(line[i]=='\n'||line[i]=='0'||line[i]=='1')
				temp+=line[i];
		file<<temp<<endl;
	}
	file.close();
}

汇编

vector<string> Assemble(const vector<string>& lines) {
    vector<string> mach_codes;
	current_address = start_address - 1;
    for (const string& line : lines) {
        string mach_code = "";
        mach_code.resize(10000);
        mach_code[0] = '\0';
        string fw = get_first_word(line);
        if (fw == ".BLKW"){
            int d = s_find(line, '#', 1);
            int imm16 = atoi(line.substr(d + 1).c_str());
            current_address += imm16;
        }
        else if (fw == ".STRINGZ"){
            int d1 = s_find(line, '"', 1) + 1;
            int d2 = s_find(line, '"', 2) - 1;
            current_address += d2 - d1 + 2;
        }
        else
            current_address++;
        string mach_line;//this string is what we finally want to get
        mach_line.resize(18);
        mach_line=translate_instruction(line);//translation
        mach_code.append(mach_line);
        mach_codes.push_back(mach_code);
    }
    return mach_codes;
}

预处理

//Label Deleting and Symbol_Adrress table construction 
string Pretreat(string line){
    string output_line;
    output_line.resize(300);
    int temp_address=-1,j = 0;
	//notice that .BLKW and .STRINGZ occupies may not only one location in memory.
    string fw = get_first_word(line);
	if (fw == ".ORIG"){
        start_address = atoi_hex(line.substr(s_find(line, 'x', 1)+1).c_str());
		current_address = start_address-1;
	}
	else if (fw == ".END")
		return "SKIP";
    else if (fw == ".BLKW" || get_first_word(line.substr(fw.length() + 1, line.length() - fw.length())) == ".BLKW"){
        int d = s_find(line, '#', 1);
        int imm16 = atoi(line.substr(d + 1).c_str());
		temp_address = current_address + 1;
        current_address += imm16;
    }
    else if (fw == ".STRINGZ"||get_first_word(line.substr(fw.length()+1,line.length()-fw.length()))==".STRINGZ"){
        int d1 = s_find(line, '"', 1) + 1;
        int d2 = s_find(line, '"', 2) - 1;
		temp_address = current_address + 1;
        current_address += d2 - d1+2;
    }
    else{
        current_address++;
		temp_address = current_address;
	}
    //If the first word is not the instruction set, it is listed in symbol_ Address table and deleted in the code
    if (fw != ".ORIG" && fw != "ADD" && fw != "AND" && fw != "NOT" && fw != "LD" && fw != "LDR" && fw != "LDI" && fw != "ST" &&
        fw != "STR" && fw != "STI" && fw != "TRAP" && fw != "LEA" && fw != "RTI" && fw != "JMP" && fw != "JSR" &&
        fw != "RET" && fw != "JSRR" && fw != ".FILL" && fw != ".STRINGZ" && fw != ".BLKW" && fw != "BR" && fw != "BRN" &&
        fw != "BRZ" && fw != "BRP" && fw != "BRNZ" && fw != "BRNP" && fw != "BRZP" && fw != "BRNZP" && fw != "TRAP" &&
         fw != ".END" && fw[0] != '\0'){
        SA.insert(make_pair(fw, temp_address));
        line.erase(s_find(line, fw[0], 1), fw.length());
		line.erase(s_find(line, ' ', 1), 1);
	}
    return line;
}

汇编指令转为机器码

string translate_instruction(const string &instruction){
    string machine_code,fw;
    fw = get_first_word(instruction);
	if (fw == "ADD")
		return trans_ADD(instruction);
	else if (fw == "AND")
		return trans_AND(instruction);
	else if (fw == "NOT")
		return trans_NOT(instruction);
	else if (fw == "LD")
		return trans_LD(instruction);
	else if (fw == "LDR")
		return trans_LDR(instruction);
	else if (fw == "LDI")
		return trans_LDI(instruction);
	else if (fw == "LEA")
		return trans_LEA(instruction);
	else if (fw == "ST")
		return trans_ST(instruction);
	else if (fw == "STR")
		return trans_STR(instruction);
	else if (fw == "STI")
		return trans_STI(instruction);
	else if (fw == "JMP")
		return trans_JMP(instruction);
	else if (fw == "JSRR")
		return trans_JSRR(instruction);
	else if (fw == "JSR")
		return trans_JSR(instruction);
	else if (fw == "RET")
		return trans_RET(instruction);
	else if (fw == "RTI")
		return trans_RTI(instruction);
	else if (fw == "BR" || fw == "BRN" || fw == "BRZ" || fw == "BRP" ||
		fw== "BRNZ" || fw == "BRNP" || fw == "BRZP" || fw == "BRNZP")
		return trans_BR(instruction);
	else if (fw == "TRAP" )
		return trans_TRAP(instruction);
	else if (fw == ".FILL")
		return trans_FILL(instruction);
	else if (fw == ".BLKW")
		return trans_BLKW(instruction);
	else if (fw == ".STRINGZ")
		return trans_STRINGZ(instruction);
	else if(fw==".END")
        return "";
    return "";
}

//translate each instrutions(from a lc-3 code line string to it's machine code string(end with \n))
//mainly by handlding the operation on STRING
string trans_ADD(string instruction){
	string code;
	code.resize(18);
	code = "0001";
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, 'R', 2) + 1;
	int SR1 = instruction[d2] - '0';
	code.append(dec_to_2com(SR1, 3));
	int d3 = s_find(instruction, '#', 1);
	if (d3 != -1){
		code.append("1");
		int imm5 = atoi(instruction.substr(d3 + 1).c_str());
		code.append(dec_to_2com(imm5, 5));
	}
	int d4 = s_find(instruction, 'x', 1);
	if (d4 != -1){
		code.append("1");
		int imm5 = atoi_hex(instruction.substr(d4 + 1).c_str());
		code.append(dec_to_2com(imm5, 5));
	}
	else if (d3 == -1 && d4 == -1){
		code.append("000");
		d3 = s_find(instruction, 'R', 3) + 1;
		int SR3 = instruction[d3] - '0';
		code.append(dec_to_2com(SR3, 3));
	}
	return code;
}

string trans_AND(string instruction){
	string code;
	code.resize(18);
	code = "0101";
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, 'R', 2) + 1;
	int SR1 = instruction[d2] - '0';
	code.append(dec_to_2com(SR1, 3));
	int d3 = s_find(instruction, '#', 1);
	if (d3 != -1){
		code.append("1");
		int imm5 = atoi(instruction.substr(d3 + 1).c_str());
		code.append(dec_to_2com(imm5, 5));
	}
	int d4 = s_find(instruction, 'x', 1);
	if (d4 != -1){
		code.append("1");
		int imm5 = atoi_hex(instruction.substr(d4 + 1).c_str());
		code.append(dec_to_2com(imm5, 5));
	}
	else if (d3 == -1 && d4 == -1){
		code.append("000");
		d3 = s_find(instruction, 'R', 3) + 1;
		int SR3 = instruction[d3] - '0';
		code.append(dec_to_2com(SR3, 3));
	}
	return code;
}

string trans_NOT(string instruction){
	string code;
	code.resize(18);
	code = "1001";
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, 'R', 2) + 1;
	int SR = instruction[d2] - '0';
	code.append(dec_to_2com(SR, 3));
	code.append("111111");
	return code;
}

string trans_LD(string instruction){
	string code;
	code.resize(18);
	code = "0010";
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, '#', 1);
	int off9;
	if (d2 != -1)
		off9 = atoi(instruction.substr(d2 + 1).c_str());
	else{
		int d3 = s_find(instruction, ',', 1) + 1;
		string last = instruction.substr(d3, instruction.length() - d3);
		string label = get_first_word(last);
		int address = 3000;
		if (SA.find(label) != SA.end())
			address = SA.find(label)->second;
		off9 = address - current_address-1;
	}
	code.append(dec_to_2com(off9, 9));
	return code;
}

string trans_LDR(string instruction){
	string code;
	code.resize(18);
	code = "0110";
	int d1 = s_find(instruction, 'R', 2) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, 'R', 3) + 1;
	int BR = instruction[d2] - '0';
	code.append(dec_to_2com(BR, 3));
    int d3 = s_find(instruction, '#', 1);
	if (d3 != -1){
		int imm6 = atoi(instruction.substr(d3 + 1).c_str());
		code.append(dec_to_2com(imm6, 6));
	}
	int d4 = s_find(instruction, 'x', 1);
	if (d4 != -1){
		int imm6 = atoi_hex(instruction.substr(d4 + 1).c_str());
		code.append(dec_to_2com(imm6, 6));
	}
	return code;
}

string trans_LDI(string instruction){
	string code;
	code.resize(18);
	code = "1010";
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, '#', 1);
	int off9;
	if (d2 != -1)
		off9 = atoi(instruction.substr(d2 + 1).c_str());
	else{
		int d3 = s_find(instruction, ',', 1) + 1;
		string last = instruction.substr(d3, instruction.length() - d3);
		string label = get_first_word(last);
		int address = 3000;
		if (SA.find(label) != SA.end())
			address = SA.find(label)->second;
		off9 = address - current_address-1;
	}
	code.append(dec_to_2com(off9, 9));
	return code;
}

string trans_LEA(string instruction){
	string code;
	code.resize(18);
	code = "1110";
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, '#', 1);
	int off9;
	if (d2 != -1)
		off9 = atoi(instruction.substr(d2 + 1).c_str());
	else{
		int d3 = s_find(instruction, ',', 1) + 1;
		string last = instruction.substr(d3, instruction.length() - d3);
		string label = get_first_word(last);
		int address = 3000;
		if (SA.find(label) != SA.end())
			address = SA.find(label)->second;
		off9 = address - current_address-1;
	}
	code.append(dec_to_2com(off9, 9));
	return code;
}

string trans_ST(string instruction){
	string code;
	code.resize(18);
	code = "0011";
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, '#', 1);
	int off9;
	if (d2 != -1)
		off9 = atoi(instruction.substr(d2 + 1).c_str());
	else{
		int d3 = s_find(instruction, ',', 1) + 1;
		string last = instruction.substr(d3, instruction.length() - d3);
		string label = get_first_word(last);
		int address = 3000;
		if (SA.find(label) != SA.end())
			address = SA.find(label)->second;
		off9 = address - current_address-1;
	}
	code.append(dec_to_2com(off9, 9));
	return code;
}

string trans_STR(string instruction){
	string code;
	code.resize(18);
	code = "0111";
	int d1 = s_find(instruction, 'R', 2) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, 'R', 3) + 1;
	int BR = instruction[d2] - '0'; 
	code.append(dec_to_2com(BR, 3));
	int d3 = s_find(instruction, '#', 1);
	if (d3 != -1){
		int imm6 = atoi(instruction.substr(d3 + 1).c_str());
		code.append(dec_to_2com(imm6, 6));
	}
	int d4 = s_find(instruction, 'x', 1);
	if (d4 != -1){
		int imm6 = atoi_hex(instruction.substr(d4 + 1).c_str());
		code.append(dec_to_2com(imm6, 6));
	}
	return code;
}

string trans_STI(string instruction){
	string code;
	code.resize(18);
	code = "1011";
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	int d2 = s_find(instruction, '#', 1);
	int off9;
	if (d2 != -1)
		off9 = atoi(instruction.substr(d2 + 1).c_str());
	else{
		int d3 = s_find(instruction, ',', 1) + 1;
		string last = instruction.substr(d3, instruction.length() - d3);
		string label = get_first_word(last);
		int address = 3000;
		if (SA.find(label) != SA.end())
			address = SA.find(label)->second;
		off9 = address - current_address-1;
	}
	code.append(dec_to_2com(off9, 9));
	return code;
}

string trans_JMP(string instruction){
	string code;
	code.resize(18);
	code = "1100";
	code.append("000");
	int d1 = s_find(instruction, 'R', 1) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	code.append("000000");
	return code;
}

string trans_JSRR(string instruction){
	string code;
	code.resize(18);
	code = "0100";
	code.append("000");
	int d1 = s_find(instruction, 'R', 3) + 1;
	int DR = instruction[d1] - '0';
	code.append(dec_to_2com(DR, 3));
	code.append("000000");
	return code;
}

string trans_JSR(string instruction){
	string code;
	code.resize(18);
	code = "01001";
	int off11;
	int d = s_find(instruction, 'R', 1) + 1;
    string last = instruction.substr(d, instruction.length() - d);
	string label = get_first_word(last);
	int address = 3000;
	if (SA.find(label) != SA.end())
		address = SA.find(label)->second;
	off11 = address - current_address-1;
	code.append(dec_to_2com(off11, 11));
	return code;
}

string trans_RTI(string instruction){
	string code= "1000000000000000";
	return code;
}

string trans_RET(string instruction){
	string code= "1100000111000000";
	return code;
}

string trans_BR(string instruction){
	string code;
	code.resize(18);
	code = "0000";
	int NZP = 0;
	string br_part = get_first_word(instruction);
	if (s_find(br_part, 'N', 1) != -1)
		NZP += 4;
	if (s_find(br_part, 'Z', 1) != -1)
		NZP += 2;
	if (s_find(br_part, 'P', 1) != -1)
		NZP += 1;
	if (s_find(br_part, 'N', 1) == -1 && s_find(br_part, 'Z', 1) == -1 && s_find(br_part, 'P', 1) == -1)
		NZP = 7;
	code.append(dec_to_2com(NZP, 3));
	int d2 = s_find(instruction, '#', 1);
	int off9;
	if (d2 != -1)
		off9 = atoi(instruction.substr(d2 + 1).c_str());
	else{
		int d3 = max(max(s_find(br_part, 'R', 1) + 1, s_find(br_part, 'N', 1) + 1), max(s_find(br_part, 'Z', 1) + 1, s_find(br_part, 'P', 1) + 1)) + s_find(instruction, 'B', 1);
		string last = instruction.substr(d3, instruction.length() - d3);
		string label = get_first_word(last);
		int address = 3000;
		if (SA.find(label) != SA.end())
			address = SA.find(label)->second;
		off9 = address - current_address-1;
	}
	code.append(dec_to_2com(off9, 9));
	return code;
}

string trans_TRAP(string instruction){
	string code;
	code.resize(18);
	code = "11110000";
	string fw = get_first_word(instruction);
	string trap_vector8 = "00000000";
	//written in "TRAP vector8" formula
	int d = s_find(instruction, 'x', 1) + 1;
	trap_vector8 = dec_to_2com(atoi_hex(instruction.substr(d, instruction.length() - d).c_str()), 8);
	code.append(trap_vector8);
	return code;
}

string trans_FILL(string instruction){
	string code = "";
	code.resize(18);
	int imm16 = 0;
	int d = s_find(instruction, '#', 1);
	if (d != -1)
		imm16 = atoi(instruction.substr(d + 1, instruction.length()-d).c_str());
	else if (s_find(instruction, 'x', 1) != -1){
		d = s_find(instruction, 'x', 1);
		imm16 = atoi_hex(instruction.substr(d + 1, instruction.length()-d).c_str());
	}
	else
		return NULL;
	code.append(dec_to_2com(imm16, 16));
	return code;
}

string trans_BLKW(string instruction){
	string line1 = "",line2="";
	line1.resize(18);
	line2.resize(18);
	line1 = "0000000000000000\n";
	line2 = "0000000000000000";
	int d = s_find(instruction, '#', 1);
	int imm16 = atoi(instruction.substr(d + 1, instruction.length()-d).c_str());
	string codes = "";
	codes.resize(d * 17 + 1);
	for (int i = 1; i < imm16; i++)
		codes.append(line1);
	codes.append(line2);
	return codes;
}

string trans_STRINGZ(string instruction){
	string codes = "";
	int d1 = s_find(instruction, '"', 1) + 1;
	int d2 = s_find(instruction, '"', 2) - 1;
	if (d1 > d2){
		codes.resize(18);
		codes.append("0000000000000000");
		return codes;
	}
	string str = instruction.substr(d1, d2 - d1 + 1);
	codes.resize(str.length() * 17 + 1);
	string line = "";
	line.resize(18);
	int i = 0;
	while (str[i]){
		line = "";
		line.append(dec_to_2com((int)str[i], 16));
		line.append("\n");
		codes.append(line);
		i++;
	}
	line = "";
	line.append("0000000000000000");
	codes.append(line);
	return codes;
}

其他一些函数

//return the first word (divided by space) in the instructiontence
string get_first_word(string str){
	int i = 0, j = 0, k = 0;
	while (str[i] == ' ')
		i++;
	string ret = "";
	for (k = 0, j = i; str[j] != ' ' && str[j]; k++, j++)
		ret += str[j];
	return ret;
}

int  npower_16(int n){//return 16^n
	int i;
	long total = 1;
	if (n >= 1)
		for (i = 0; i < n; i++)
			total *= 16;
	return total;
}

//turn a character to the correspond number in hex repreinstructiontation
int  Hex2Num(char ch){
	int value = 0;
	if (ch >= '0' && ch <= '9')
		value = ch - 48;
	if (ch >= 'A' && ch <= 'F')
		value = ch - 65 + 10;
	if (ch >= 'a' && ch <= 'f')
		value = ch - 97 + 10;
	return  value;
}

//translate a hex string to it's value
int atoi_hex(string strInValue){
	strInValue = get_first_word(strInValue);
	int j = 0;
	int flag = 1;
	if (strInValue[0] == '-'){
		flag = -1;//negative
		j++;
	}
	char hex[9];
	char str[9];
	int  cnt = 0, i = 0;
	int  sum = 0;
	while (strInValue[j] != '\0' && strInValue[j] != ' '){
		hex[cnt] = strInValue[j];
		cnt++;
		j++;
	}
	hex[cnt] = '\0';
	for (i = 0; i < cnt; i++)
		str[i] = hex[cnt - 1 - i];//reverse
	str[cnt] = '\0';
	for (i = 0; i < cnt; i++)
		sum += npower_16(i) * Hex2Num(str[i]);
	return  flag * sum;
}

//This function return the position of the time'th character 'c' in the string str
int s_find(string str, char c, int time){
	int ret = -1;
	for (int i = 0; str[i] != '\0'; i++){
		if (str[i] == c && --time <= 0){
			ret = i;
			break;
		}
	}
	return ret;
}

//change a decimical number n to it's "digit" digits 2's complements
string dec_to_2com(int n, int digit){
	string imm(digit, '0');
	int temp = n;
	int i = digit - 1;
	if (temp >= 0){
		while (n){
			if (i < 0)
				return NULL;
			imm[i--] = n % 2 + '0';
			n /= 2;
		}
	}
	else if (temp < 0){
		n = -(temp + 1);
		while (n){
			if (i < 0)
				return NULL;
			imm[i--] = n % 2 + '0';
			n /= 2;
		}
		for (int r = 0; r < digit; r++)
			imm[r] = '1' - imm[r] + '0';
	}
	return imm;
}

  • 11
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

千里澄江

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值