题目描述
请使用Python/C++语言,实现一个MIPS的汇编器,完成将你常用到的汇编指令翻译为机器指令。
输入:汇编语言程序文件
输出:二进制或十六进制文件,采用Xilinx Vivado的COE文件格式
代码实现
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# created by 染汐
# URL:
# Time:
# __________ _____ _______ ____ ___.___
# \______ \ / _ \ \ \ \ \/ /| |
# | _/ / /_\ \ / | \ \ / | |
# | | \/ | \/ | \/ \ | |
# |____|_ /\____|__ /\____|__ /___/\ \|___|
# \/ \/ \/ \_/
import copy
file_path = r"D:\下载\mips.asm"
output_location = r"C:\Users\86130\Desktop\result.COE"
# R类型 (op rs rt rd shamt funct) 6 5 5 5 5 6
signal = []
register = {
"$0": "00000",
"$at": "00001",
"$v0": "00010",
"$v1": "00011",
"$t0": "01000",
"$t1": "01001",
"$t2": "01010",
"$t3": "01011",
"$t4": "01100",
"$t5": "01101",
"$t6": "01110",
"$t7": "01111",
"$s0": "10000",
"$s1": "10001",
"$s2": "10010",
"$s3": "10011",
"$s4": "10100",
"$s5": "10101",
"$s6": "10110",
"$s7": "10111",
"$t8": "11000",
"$t9": "11001",
"$k0": "11010",
"$k1": "11011",
"$gp": "11100",
"$sp": "11101",
"$fp": "11110",
"$ra": "11111"
}
# nop(空操作,等效于addi $0, $0, 0)
# li $x, abcd(将一个32位立即数abcd加载到目标寄存器x,请使用lui等指令序列实现,请注意立即数是补码形式)
# mv $a, $b(将寄存器b内容复制到寄存器a中,等效于addi $a, $b, 0)
# not $a, $b(将寄存器b内容取反后复制到寄存器a中,等效于xori $a, $b, -1)
# neg $a, $b(将寄存器b内容取负后复制到寄存器a中,等效于sub $a, $0, $b)
opcode = {
"R_Type": "000000",
"bltz": "000001",
"j": "000010",
"jal": "000011",
"beq": "000100",
"bne": "000101",
"blez": "000110",
"bgtz": "000111",
"addi": "001000",
"addiu": "001001",
"slti": "001010",
"sltiu": "001011",
"andi": "001100",
"ori": "001101",
"xori": "001110",
"lui": "001111",
"mfc0": "010000",
"mtc0": "010000",
"F_Type": "010001",
"bclf": "010001",
"bclt": "010001",
"mul": "011100",
"lb": "100000",
"lh": "100001",
"lw": "100011",
"lbu": "100100",
"lhu": "100101",
"sb": "101000",
"sh": "101001",
"sw": "101011",
"lwcl": "110001",
"swcl": "111001"
}
RTypeInstruct = {
"sll": "000000",
"srl": "000010",
"sra": "000011",
"sllv": "000100",
"srlv": "000110",
"srav": "000111",
"jr": "001000",
"jalr": "001001",
"syscall": "001100",
"break": "001101",
"add": "100000",
"sub": "100010",
"and": "100100",
"or": "100101",
"xor": "100110",
"nor": "100111",
"slt": "101010",
"mult": "011000",
"mfhi": "010000",
"mthi": "010001",
"mflo": "010010",
"mtlo": "010011",
"div": "011010"
}
command_dict = {
'bltz': {'type': 'I', 'opcode': 1, 'form': ['rs', 'imm']},
'bgez': {'type': 'I', 'opcode': 1, 'form': ['rs', 'imm']},
'j': {'type': 'J', 'opcode': 2, 'form': ['imm']},
'jal': {'type': 'J', 'opcode': 3, 'form': ['imm']},
'beq': {'type': 'I', 'opcode': 4, 'form': ['rs', 'rt', 'imm']},
'bne': {'type': 'I', 'opcode': 5, 'form': ['rs', 'rt', 'imm']},
'blez': {'type': 'I', 'opcode': 6, 'form': ['rs', 'imm']},
'bgtz': {"type": 'I', "opcode": 7, 'form': ['rs', 'imm']},
'addi': {'type': 'I', 'opcode': 8, 'form': ['rt', 'rs', 'imm']},
'addiu': {'type': 'I', 'opcode': 9, 'form': ['rt', 'rs', 'imm']},
'slti': {'type': 'I', 'opcode': 10, 'form': ['rt', 'rs', 'imm']},
'sltiu': {'type': 'I', 'opcode': 11, 'form': ['rt', 'rs', 'imm']},
'andi': {'type': 'I', 'opcode': 12, 'form': ['rt', 'rs', 'imm']},
'ori': {'type': 'I', 'opcode': 13, 'form': ['rt', 'rs', 'imm']},
'xori': {'type': 'I', 'opcode': 14, 'form': ['rt', 'rs', 'imm']},
'lui': {'type': 'I', 'opcode': 15, 'form': ['rt', 'imm']},
'lb': {'type': 'I', 'opcode': 32, 'form': ['rt', 'imm', 'rs']},
'lh': {'type': 'I', 'opcode': 33, 'form': ['rt', 'imm', 'rs']},
'lw': {'type': 'I', 'opcode': 35, 'form': ['rt', 'imm', 'rs']},
'sb': {'type': 'I', 'opcode': 40, 'form': ['rt', 'imm', 'rs']},
'sh': {'type': 'I', 'opcode': 41, 'form': ['rt', 'imm', 'rs']},
'sw': {'type': 'I', 'opcode': 43, 'form': ['rt', 'imm', 'rs']},
'sll': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'shamt']},
'srl': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'shamt']},
'sra': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'shamt']},
'sllv': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'rs']},
'srlv': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'rs']},
'srav': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'rs']},
'jr': {'type': 'R', 'opcode': 0, 'form': ['rs']},
'jalr': {'type': 'R', 'opcode': 0, 'form': ['rs']},
'mfhi': {'type': 'R', 'opcode': 0, 'form': ['rd']},
'mthi': {'type': 'R', 'opcode': 0, 'form': ['rs']},
'mflo': {'type': 'R', 'opcode': 0, 'form': ['rd']},
'mtlo': {'type': 'R', 'opcode': 0, 'form': ['rs']},
'mult': {'type': 'R', 'opcode': 0, 'form': ['rs', "rt"]},
'multu': {'type': 'R', 'opcode': 0, 'form': ['rs', "rt"]},
'div': {'type': 'R', 'opcode': 0, 'form': ['rs', "rt"]},
'divu': {'type': 'R', 'opcode': 0, 'form': ['rs', "rt"]},
'add': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'addu': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'sub': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'subu': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'and': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'or': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'xor': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'nor': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'slt': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
'sltu': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
}
pseudoIS = {
'nop': {'type': 'P', 'instruction': 'addi $0, $0, 0'},
'li': {'type': 'P', 'instruction': ''},
'mv': {'type': 'P', 'instruction': ''},
'not': {'type': 'P', 'instruction': ''},
'neg': {'type': 'P', 'instruction': ''},
}
register_count = {'$0': 0, '$at': 0, '$v0': 0, '$v1': 0, '$t0': 0, '$t1': 0, '$t2': 0, '$t3': 0, '$t4': 0, '$t5': 0,
'$t6': 0, '$t7': 0, '$s0': 0, '$s1': 0, '$s2': 0, '$s3': 0, '$s4': 0, '$s5': 0, '$s6': 0, '$s7': 0,
'$t8': 0, '$t9': 0, '$k0': 0, '$k1': 0, '$gp': 0, '$sp': 0, '$fp': 0, '$ra': 0}
def decimal_to_binary(decimal, width):
binary = bin(decimal & int("1" * width, 2))[2:] # 将十进制数转为二进制字符串并截取指定位宽
return binary.zfill(width) # 补零至指定位宽
def mybin(n: str) -> str:
if judge(n):
n = int(eval(n))
binary = bin(n & int("1" * 16, 2))[2:]
return binary.zfill(16)
def judge(a: str) -> bool:
if (a.startswith("0x") or a.startswith("0o") or a.startswith("0b") or a.isdigit()) or (
a.startswith('-') and a[1:].isdigit()):
return True
return False
def RType_transfer(src: str) -> str:
src_list = src.replace(',', ' ').replace(')', ' ').replace('(', ' ').split()
local_opcode = "000000"
a = command_dict[src_list[0]]["form"]
if len(a) + 1 != len(src_list):
return "length is not matched"
if len(a) > 2 and a[2] == "shamt":
if not judge(src_list[3]):
return "shamt need an immediate number"
elif int(eval(src_list[3])) < 0 or int(eval(src_list[3])) > 31:
return "shamt overflow"
local_rs = "00000"
local_rt = "00000"
local_rd = "00000"
local_shamt = "00000"
if 'rs' in a:
if src_list[a.index('rs') + 1] not in register.keys():
return f"No register named this"
local_rs = register[src_list[a.index('rs') + 1]]
if 'rt' in a:
if src_list[a.index('rt') + 1] not in register.keys():
return f"No register named this"
local_rt = register[src_list[a.index('rt') + 1]]
if 'rd' in a:
if src_list[a.index('rd') + 1] not in register.keys():
return f"No register named this"
local_rd = register[src_list[a.index('rd') + 1]]
if 'shamt' in a:
local_shamt = mybin(src_list[a.index('shamt') + 1])[-5:]
local_func = RTypeInstruct[src_list[0]]
# print(local_opcode)
# print(local_rs)
# print(local_rt)
# print(local_rd)
# print(local_shamt)
# print(local_func)
ans = local_opcode + local_rs + local_rt + local_rd + local_shamt + local_func
return ans
def IType_transfer(src: str) -> str:
# I类型指令(立即数类型的缩写)
# 分为4个字段 op rs rt imm
# 6 5 5 16
src_list = src.replace(',', ' ').replace(')', ' ').replace('(', ' ').split()
a = command_dict[src_list[0]]["form"]
local_opcode = opcode[src_list[0]]
local_rs = "00000"
local_rt = "00000"
if len(a) + 1 != len(src_list):
return "length is not matched"
if 'rt' in a:
if src_list[a.index('rt') + 1] not in register.keys():
return f"No register named this"
local_rt = register[src_list[a.index('rt') + 1]]
if 'rs' in a:
if src_list[a.index('rs') + 1] not in register.keys():
return f"No register named this"
local_rs = register[src_list[a.index('rs') + 1]]
if not judge(src_list[a.index('imm') + 1]):
return "immediate number need 'int'"
elif int(eval(src_list[a.index('imm') + 1])) < -2 ** 20 or int(
eval(src_list[a.index('imm') + 1])) > 2 ** 20 - 1:
return "immediate number overflow"
local_imm = mybin(src_list[a.index('imm') + 1]).rjust(16, '0')
ans = local_opcode + local_rs + local_rt + local_imm
# print(local_opcode)
# print(local_rs)
# print(local_rt)
# print(local_imm)
return ans
def decimal_to_26bit_binary_complement(decimal):
if decimal >= 0:
binary = bin(decimal)[2:].zfill(26) # 转换为26位二进制,并补足位数
else:
binary = bin(2 ** 26 + decimal)[2:] # 转换为正数的补码形式
return binary
def JType_transfer(src: str) -> str:
src_list = src.split()
x = int(eval(src_list[1]))
if x < -2 ** 25 or x > 2 ** 25 - 1:
return "The address is too large to address"
ans = opcode[src_list[0]] + decimal_to_26bit_binary_complement(x)
return ans
tmp = dict()
def read():
# 读取文件,删空行,删空格,记跳转
ans = []
with open(file_path, 'r', encoding="UTF-8") as f:
res = f.readlines()
for i in res:
i.strip()
if i.isspace():
continue
if i.find(":") != -1:
a = i.split(':')[0]
tmp[a] = [[], []]
# continue
if i[0] == '#':
continue
i = i.split("#")[0].strip()
if i.isspace() or not i:
continue
ans.append(i)
for i in range(len(ans)):
if ans[i].startswith('j ') or ans[i].startswith("jal "):
if not ans[i].split()[-1][0].isalpha():
ans[i] = 'error'
res = []
for i in range(len(ans)):
tmp_str = ans[i].strip()
if ':' in tmp_str:
tmp_list = tmp_str.split(":")
if len(tmp_list) > 1:
res.append(tmp_list[0] + ":")
if tmp_list[1]:
res.append(tmp_list[1])
else:
res.append(tmp_list[0])
else:
res.append(ans[i])
return res
def process(src_list: list) -> None:
# 加工函数,找到挑战指令的跳转位置
ans = []
flag = True
for i in src_list:
if i.split()[-1] in tmp.keys():
a = src_list.index(i)
tmp[i.split()[-1]][0].append(a)
flag = False
ans.append(i)
elif i.split(":")[0] in tmp.keys():
b = src_list.index(i)
flag = True
tmp[i.split(":")[0]][1].append(b)
# local_cnt+=1
elif flag:
ans.append(i)
def f(src_list: list) -> list:
# 处理跳转标签和伪指令
# nop(空操作,等效于addi $0, $0, 0)
# li $x, abcd(将一个32位立即数abcd加载到目标寄存器x,请使用lui等指令序列实现,请注意立即数是补码形式)
# mv $a, $b(将寄存器b内容复制到寄存器a中,等效于addi $a, $b, 0)
# not $a, $b(将寄存器b内容取反后复制到寄存器a中,等效于xori $a, $b, -1)
# neg $a, $b(将寄存器b内容取负后复制到寄存器a中,等效于sub $a, $0, $b)
ans = []
i = 0
length = len(src_list)
while i < length:
if src_list[i].split()[-1] in tmp.keys() and (i in tmp[src_list[i].split()[-1]][0]):
ans.append(src_list[i])
tmp[src_list[i].split()[-1]][0].remove(i)
elif src_list[i].split(":")[0] in tmp.keys():
i += 1
continue
elif src_list[i] == 'nop':
ans.append("addi $0, $0, 0")
elif src_list[i].replace(',', ' ').split()[0] == 'li':
local_imm = mybin(src_list[i].replace(',', ' ').split()[-1]).rjust(32, '0')
ans.append("lui " + src_list[i].replace(',', ' ').split()[1] + ",0b" + local_imm[0:16])
ans.append("ori " + src_list[i].replace(',', ' ').split()[1] + ',' + src_list[i].replace(',', ' ').split()[
1] + ",0b" + local_imm[16:])
# lui $x, abcd[31:16] # 将立即数的高16位加载到目标寄存器$x的高位
# ori $x, $x, abcd[15:0] # 将立即数的低16位加载到目标寄存器$x的低位
elif src_list[i].split()[0] == 'mv':
command_str = src_list[i].replace('mv', "addi") + ',0'
ans.append(command_str)
elif src_list[i].split()[0] == 'not':
command_str = src_list[i].replace('not', "xori") + ',-1'
ans.append(command_str)
elif src_list[i].split()[0] == 'neg':
tmp_list = src_list[i].replace('neg', "sub").replace(',', ' ').split()
tmp_src = tmp_list[0] + " " + tmp_list[1] + ',$0,' + tmp_list[2]
ans.append(tmp_src)
else:
if not src_list[i].split()[-1] in tmp.keys():
ans.append(src_list[i])
i += 1
return ans
def translate(unfolded_list: list) -> list:
# 将跳转符号转换成数字
ans = []
J = ["j", "jal", "jr"]
for i in range(0, len(unfolded_list)):
if unfolded_list[i].split()[-1] in j_command_dict.keys():
if unfolded_list[i].split()[0].strip() in J:
x = j_command_dict[unfolded_list[i].split()[-1]][1][0]
for v in j_command_dict.values():
for num in v[-1]:
if num < x:
x -= 1
code_str = str(x)
ans.append(unfolded_list[i].split()[0] + " " + code_str)
continue
else:
code_str = str(
j_command_dict[unfolded_list[i].split()[-1]][1][0] -
j_command_dict[unfolded_list[i].split()[-1]][0][
0] - 1 if j_command_dict[unfolded_list[i].split()[-1]][1][0] -
j_command_dict[unfolded_list[i].split()[-1]][0][0] - 1 > 0 else
j_command_dict[unfolded_list[i].split()[-1]][1][0] -
j_command_dict[unfolded_list[i].split()[-1]][0][
0])
# print("Here")
# print(j_command_dict[unfolded_list[i].split()[-1]][1][0])
# print(j_command_dict[unfolded_list[i].split()[-1]][0][0])
# print(code_str)
cnt = 0
# print(signal)
for location in signal:
# print("Now")
# print(location)
a = min([j_command_dict[unfolded_list[i].split()[-1]][1][0],
j_command_dict[unfolded_list[i].split()[-1]][0][0]])
b = max([j_command_dict[unfolded_list[i].split()[-1]][1][0],
j_command_dict[unfolded_list[i].split()[-1]][0][0]])
# print(a)
# print(b)
# print(a < location < b)
if a < location < b:
cnt += 1
# print(cnt)
code = int(code_str)
code = code - cnt if code > 0 else code + cnt
# print("vrwv")
# print(cnt)
code_str = str(code)
# print(code_str)
# print(signal)
new_command = unfolded_list[i].replace(unfolded_list[i].split()[-1], code_str)
j_command_dict[unfolded_list[i].split()[-1]][0].pop(0)
ans.append(new_command)
else:
ans.append(unfolded_list[i])
return ans
# ans1 = read()
# process(ans1[:])
# j_command_dict = copy.deepcopy(tmp)
# p = unfold(ans1)
# a = translate(p)
# print(a)
def bin_to_hex(src: str) -> str:
return str(hex(int(eval("0b" + src))))[2:].rjust(8, '0')
cnt = 0
def getBinaryCodeList(src_list) -> list:
ans = []
global cnt
for i in src_list:
if i == 'error':
ans.append("The label must be a valid identifier")
continue
command_name = i.replace(',', ' ').split()[0]
if command_name in command_dict.keys():
command_opt = command_dict[command_name]['type']
else:
ans.append("Key Error!")
continue
# print(i)
if command_opt == 'R':
tmp = RType_transfer(i)
print(tmp)
print(i)
if tmp.startswith('0') or tmp.startswith('1'):
print(bin_to_hex(tmp))
ans.append(bin_to_hex(tmp))
cnt += 1
else:
ans.append(tmp)
elif command_opt == 'I':
tmp = IType_transfer(i)
if tmp.startswith('0') or tmp.startswith('1'):
print(bin_to_hex(tmp))
ans.append(bin_to_hex(tmp))
cnt += 1
else:
ans.append(tmp)
elif command_opt == 'J':
tmp = JType_transfer(i)
if tmp.startswith('0') or tmp.startswith('1'):
print(bin_to_hex(tmp))
ans.append(bin_to_hex(tmp))
cnt += 1
else:
ans.append(tmp)
return ans
def work(src_list):
head_str = f"""; This .COE file specifies the contents for a block memory of depth={cnt}, and width=32.
memory_initialization_radix={cnt};
memory_initialization_vector=\n"""
with open(output_location, mode='w') as fp:
fp.write(head_str)
for i in src_list:
fp.write(i)
fp.write(',\n')
if __name__ == '__main__':
ans1 = read()
print(ans1)
process(ans1[:])
j_command_dict = copy.deepcopy(tmp)
print(j_command_dict)
for i in list(j_command_dict.values()):
signal += i[1]
p = f(ans1)
print(p)
src_list = translate(p)
print(src_list)
bin_code_list = getBinaryCodeList(src_list)
work(bin_code_list)
更新于2024/01/21