MIPS汇编器设计

染汐_
已于 2024-01-21 13:57:11 修改
阅读量437
点赞数 8
文章标签： python
于 2023-12-26 21:34:22 首次发布
本文链接：https://blog.csdn.net/weixin_74173457/article/details/135231929
版权
题目描述

请使用Python/C++语言，实现一个MIPS的汇编器，完成将你常用到的汇编指令翻译为机器指令。
输入：汇编语言程序文件
输出：二进制或十六进制文件，采用Xilinx Vivado的COE文件格式
代码实现

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# created by 染汐
# URL:
# Time:
# __________    _____    _______  ____  ___.___ 
# \______   \  /  _  \   \      \ \   \/  /|   |
#  |       _/ /  /_\  \  /   |   \ \     / |   |
#  |    |   \/    |    \/    |    \/     \ |   |
#  |____|_  /\____|__  /\____|__  /___/\  \|___|
#         \/         \/         \/      \_/     
import copy

file_path = r"D:\下载\mips.asm"
output_location = r"C:\Users\86130\Desktop\result.COE"
# R类型 (op rs rt rd shamt funct) 6 5 5 5 5 6
signal = []
register = {
    "$0": "00000",
    "$at": "00001",
    "$v0": "00010",
    "$v1": "00011",
    "$t0": "01000",
    "$t1": "01001",
    "$t2": "01010",
    "$t3": "01011",
    "$t4": "01100",
    "$t5": "01101",
    "$t6": "01110",
    "$t7": "01111",
    "$s0": "10000",
    "$s1": "10001",
    "$s2": "10010",
    "$s3": "10011",
    "$s4": "10100",
    "$s5": "10101",
    "$s6": "10110",
    "$s7": "10111",
    "$t8": "11000",
    "$t9": "11001",
    "$k0": "11010",
    "$k1": "11011",
    "$gp": "11100",
    "$sp": "11101",
    "$fp": "11110",
    "$ra": "11111"
}
# nop（空操作，等效于addi $0, $0, 0）
# li $x, abcd（将一个32位立即数abcd加载到目标寄存器x，请使用lui等指令序列实现，请注意立即数是补码形式）
# mv $a, $b（将寄存器b内容复制到寄存器a中，等效于addi $a, $b, 0）
# not $a, $b（将寄存器b内容取反后复制到寄存器a中，等效于xori $a, $b, -1）
# neg $a, $b（将寄存器b内容取负后复制到寄存器a中，等效于sub $a, $0, $b）
opcode = {
    "R_Type": "000000",
    "bltz": "000001",
    "j": "000010",
    "jal": "000011",
    "beq": "000100",
    "bne": "000101",
    "blez": "000110",
    "bgtz": "000111",
    "addi": "001000",
    "addiu": "001001",
    "slti": "001010",
    "sltiu": "001011",
    "andi": "001100",
    "ori": "001101",
    "xori": "001110",
    "lui": "001111",
    "mfc0": "010000",
    "mtc0": "010000",
    "F_Type": "010001",
    "bclf": "010001",
    "bclt": "010001",
    "mul": "011100",
    "lb": "100000",
    "lh": "100001",
    "lw": "100011",
    "lbu": "100100",
    "lhu": "100101",
    "sb": "101000",
    "sh": "101001",
    "sw": "101011",
    "lwcl": "110001",
    "swcl": "111001"
}
RTypeInstruct = {
    "sll": "000000",
    "srl": "000010",
    "sra": "000011",
    "sllv": "000100",
    "srlv": "000110",
    "srav": "000111",
    "jr": "001000",
    "jalr": "001001",
    "syscall": "001100",
    "break": "001101",
    "add": "100000",
    "sub": "100010",
    "and": "100100",
    "or": "100101",
    "xor": "100110",
    "nor": "100111",
    "slt": "101010",
    "mult": "011000",
    "mfhi": "010000",
    "mthi": "010001",
    "mflo": "010010",
    "mtlo": "010011",
    "div": "011010"
}

command_dict = {
    'bltz': {'type': 'I', 'opcode': 1, 'form': ['rs', 'imm']},
    'bgez': {'type': 'I', 'opcode': 1, 'form': ['rs', 'imm']},
    'j': {'type': 'J', 'opcode': 2, 'form': ['imm']},
    'jal': {'type': 'J', 'opcode': 3, 'form': ['imm']},
    'beq': {'type': 'I', 'opcode': 4, 'form': ['rs', 'rt', 'imm']},
    'bne': {'type': 'I', 'opcode': 5, 'form': ['rs', 'rt', 'imm']},
    'blez': {'type': 'I', 'opcode': 6, 'form': ['rs', 'imm']},
    'bgtz': {"type": 'I', "opcode": 7, 'form': ['rs', 'imm']},
    'addi': {'type': 'I', 'opcode': 8, 'form': ['rt', 'rs', 'imm']},
    'addiu': {'type': 'I', 'opcode': 9, 'form': ['rt', 'rs', 'imm']},
    'slti': {'type': 'I', 'opcode': 10, 'form': ['rt', 'rs', 'imm']},
    'sltiu': {'type': 'I', 'opcode': 11, 'form': ['rt', 'rs', 'imm']},
    'andi': {'type': 'I', 'opcode': 12, 'form': ['rt', 'rs', 'imm']},
    'ori': {'type': 'I', 'opcode': 13, 'form': ['rt', 'rs', 'imm']},
    'xori': {'type': 'I', 'opcode': 14, 'form': ['rt', 'rs', 'imm']},
    'lui': {'type': 'I', 'opcode': 15, 'form': ['rt', 'imm']},
    'lb': {'type': 'I', 'opcode': 32, 'form': ['rt', 'imm', 'rs']},
    'lh': {'type': 'I', 'opcode': 33, 'form': ['rt', 'imm', 'rs']},
    'lw': {'type': 'I', 'opcode': 35, 'form': ['rt', 'imm', 'rs']},
    'sb': {'type': 'I', 'opcode': 40, 'form': ['rt', 'imm', 'rs']},
    'sh': {'type': 'I', 'opcode': 41, 'form': ['rt', 'imm', 'rs']},
    'sw': {'type': 'I', 'opcode': 43, 'form': ['rt', 'imm', 'rs']},

    'sll': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'shamt']},
    'srl': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'shamt']},
    'sra': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'shamt']},
    'sllv': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'rs']},
    'srlv': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'rs']},
    'srav': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rt', 'rs']},
    'jr': {'type': 'R', 'opcode': 0, 'form': ['rs']},
    'jalr': {'type': 'R', 'opcode': 0, 'form': ['rs']},
    'mfhi': {'type': 'R', 'opcode': 0, 'form': ['rd']},
    'mthi': {'type': 'R', 'opcode': 0, 'form': ['rs']},
    'mflo': {'type': 'R', 'opcode': 0, 'form': ['rd']},
    'mtlo': {'type': 'R', 'opcode': 0, 'form': ['rs']},
    'mult': {'type': 'R', 'opcode': 0, 'form': ['rs', "rt"]},
    'multu': {'type': 'R', 'opcode': 0, 'form': ['rs', "rt"]},
    'div': {'type': 'R', 'opcode': 0, 'form': ['rs', "rt"]},
    'divu': {'type': 'R', 'opcode': 0, 'form': ['rs', "rt"]},
    'add': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'addu': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'sub': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'subu': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'and': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'or': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'xor': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'nor': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'slt': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
    'sltu': {'type': 'R', 'opcode': 0, 'form': ['rd', 'rs', 'rt']},
}

pseudoIS = {
    'nop': {'type': 'P', 'instruction': 'addi $0, $0, 0'},
    'li': {'type': 'P', 'instruction': ''},
    'mv': {'type': 'P', 'instruction': ''},
    'not': {'type': 'P', 'instruction': ''},
    'neg': {'type': 'P', 'instruction': ''},
}
register_count = {'$0': 0, '$at': 0, '$v0': 0, '$v1': 0, '$t0': 0, '$t1': 0, '$t2': 0, '$t3': 0, '$t4': 0, '$t5': 0,
                  '$t6': 0, '$t7': 0, '$s0': 0, '$s1': 0, '$s2': 0, '$s3': 0, '$s4': 0, '$s5': 0, '$s6': 0, '$s7': 0,
                  '$t8': 0, '$t9': 0, '$k0': 0, '$k1': 0, '$gp': 0, '$sp': 0, '$fp': 0, '$ra': 0}


def decimal_to_binary(decimal, width):
    binary = bin(decimal & int("1" * width, 2))[2:]  # 将十进制数转为二进制字符串并截取指定位宽
    return binary.zfill(width)  # 补零至指定位宽


def mybin(n: str) -> str:
    if judge(n):
        n = int(eval(n))
        binary = bin(n & int("1" * 16, 2))[2:]
        return binary.zfill(16)


def judge(a: str) -> bool:
    if (a.startswith("0x") or a.startswith("0o") or a.startswith("0b") or a.isdigit()) or (
            a.startswith('-') and a[1:].isdigit()):
        return True
    return False


def RType_transfer(src: str) -> str:
    src_list = src.replace(',', ' ').replace(')', ' ').replace('(', ' ').split()
    local_opcode = "000000"
    a = command_dict[src_list[0]]["form"]
    if len(a) + 1 != len(src_list):
        return "length is not matched"
    if len(a) > 2 and a[2] == "shamt":
        if not judge(src_list[3]):
            return "shamt need an immediate number"
        elif int(eval(src_list[3])) < 0 or int(eval(src_list[3])) > 31:
            return "shamt overflow"
    local_rs = "00000"
    local_rt = "00000"
    local_rd = "00000"
    local_shamt = "00000"
    if 'rs' in a:
        if src_list[a.index('rs') + 1] not in register.keys():
            return f"No register named this"
        local_rs = register[src_list[a.index('rs') + 1]]
    if 'rt' in a:
        if src_list[a.index('rt') + 1] not in register.keys():
            return f"No register named this"
        local_rt = register[src_list[a.index('rt') + 1]]
    if 'rd' in a:
        if src_list[a.index('rd') + 1] not in register.keys():
            return f"No register named this"
        local_rd = register[src_list[a.index('rd') + 1]]
    if 'shamt' in a:
        local_shamt = mybin(src_list[a.index('shamt') + 1])[-5:]
    local_func = RTypeInstruct[src_list[0]]
    # print(local_opcode)
    # print(local_rs)
    # print(local_rt)
    # print(local_rd)
    # print(local_shamt)
    # print(local_func)
    ans = local_opcode + local_rs + local_rt + local_rd + local_shamt + local_func
    return ans


def IType_transfer(src: str) -> str:
    # I类型指令（立即数类型的缩写）
    # 分为4个字段 op rs rt  imm
    #           6  5  5   16
    src_list = src.replace(',', ' ').replace(')', ' ').replace('(', ' ').split()
    a = command_dict[src_list[0]]["form"]
    local_opcode = opcode[src_list[0]]
    local_rs = "00000"
    local_rt = "00000"
    if len(a) + 1 != len(src_list):
        return "length is not matched"
    if 'rt' in a:
        if src_list[a.index('rt') + 1] not in register.keys():
            return f"No register named this"
        local_rt = register[src_list[a.index('rt') + 1]]
    if 'rs' in a:
        if src_list[a.index('rs') + 1] not in register.keys():
            return f"No register named this"
        local_rs = register[src_list[a.index('rs') + 1]]
    if not judge(src_list[a.index('imm') + 1]):
        return "immediate number need 'int'"
    elif int(eval(src_list[a.index('imm') + 1])) < -2 ** 20 or int(
            eval(src_list[a.index('imm') + 1])) > 2 ** 20 - 1:
        return "immediate number overflow"
    local_imm = mybin(src_list[a.index('imm') + 1]).rjust(16, '0')
    ans = local_opcode + local_rs + local_rt + local_imm
    # print(local_opcode)
    # print(local_rs)
    # print(local_rt)
    # print(local_imm)
    return ans


def decimal_to_26bit_binary_complement(decimal):
    if decimal >= 0:
        binary = bin(decimal)[2:].zfill(26)  # 转换为26位二进制，并补足位数
    else:
        binary = bin(2 ** 26 + decimal)[2:]  # 转换为正数的补码形式

    return binary


def JType_transfer(src: str) -> str:
    src_list = src.split()
    x = int(eval(src_list[1]))
    if x < -2 ** 25 or x > 2 ** 25 - 1:
        return "The address is too large to address"
    ans = opcode[src_list[0]] + decimal_to_26bit_binary_complement(x)

    return ans


tmp = dict()


def read():
    # 读取文件，删空行，删空格，记跳转
    ans = []
    with open(file_path, 'r', encoding="UTF-8") as f:
        res = f.readlines()
        for i in res:
            i.strip()
            if i.isspace():
                continue
            if i.find(":") != -1:
                a = i.split(':')[0]
                tmp[a] = [[], []]
                # continue
            if i[0] == '#':
                continue
            i = i.split("#")[0].strip()
            if i.isspace() or not i:
                continue
            ans.append(i)
    for i in range(len(ans)):
        if ans[i].startswith('j ') or ans[i].startswith("jal "):
            if not ans[i].split()[-1][0].isalpha():
                ans[i] = 'error'
    res = []
    for i in range(len(ans)):
        tmp_str = ans[i].strip()
        if ':' in tmp_str:
            tmp_list = tmp_str.split(":")
            if len(tmp_list) > 1:
                res.append(tmp_list[0] + ":")
                if tmp_list[1]:
                    res.append(tmp_list[1])
            else:
                res.append(tmp_list[0])
        else:
            res.append(ans[i])
    return res


def process(src_list: list) -> None:
    # 加工函数，找到挑战指令的跳转位置
    ans = []
    flag = True
    for i in src_list:
        if i.split()[-1] in tmp.keys():
            a = src_list.index(i)
            tmp[i.split()[-1]][0].append(a)
            flag = False
            ans.append(i)
        elif i.split(":")[0] in tmp.keys():
            b = src_list.index(i)
            flag = True
            tmp[i.split(":")[0]][1].append(b)
            # local_cnt+=1
        elif flag:
            ans.append(i)


def f(src_list: list) -> list:
    # 处理跳转标签和伪指令
    # nop（空操作，等效于addi $0, $0, 0）
    # li $x, abcd（将一个32位立即数abcd加载到目标寄存器x，请使用lui等指令序列实现，请注意立即数是补码形式）
    # mv $a, $b（将寄存器b内容复制到寄存器a中，等效于addi $a, $b, 0）
    # not $a, $b（将寄存器b内容取反后复制到寄存器a中，等效于xori $a, $b, -1）
    # neg $a, $b（将寄存器b内容取负后复制到寄存器a中，等效于sub $a, $0, $b）
    ans = []
    i = 0
    length = len(src_list)
    while i < length:
        if src_list[i].split()[-1] in tmp.keys() and (i in tmp[src_list[i].split()[-1]][0]):
            ans.append(src_list[i])
            tmp[src_list[i].split()[-1]][0].remove(i)
        elif src_list[i].split(":")[0] in tmp.keys():
            i += 1
            continue
        elif src_list[i] == 'nop':
            ans.append("addi $0, $0, 0")
        elif src_list[i].replace(',', ' ').split()[0] == 'li':
            local_imm = mybin(src_list[i].replace(',', ' ').split()[-1]).rjust(32, '0')
            ans.append("lui " + src_list[i].replace(',', ' ').split()[1] + ",0b" + local_imm[0:16])
            ans.append("ori " + src_list[i].replace(',', ' ').split()[1] + ',' + src_list[i].replace(',', ' ').split()[
                1] + ",0b" + local_imm[16:])
            # lui $x, abcd[31:16]  # 将立即数的高16位加载到目标寄存器$x的高位
            # ori $x, $x, abcd[15:0]  # 将立即数的低16位加载到目标寄存器$x的低位
        elif src_list[i].split()[0] == 'mv':
            command_str = src_list[i].replace('mv', "addi") + ',0'
            ans.append(command_str)
        elif src_list[i].split()[0] == 'not':
            command_str = src_list[i].replace('not', "xori") + ',-1'
            ans.append(command_str)
        elif src_list[i].split()[0] == 'neg':
            tmp_list = src_list[i].replace('neg', "sub").replace(',', ' ').split()
            tmp_src = tmp_list[0] + " " + tmp_list[1] + ',$0,' + tmp_list[2]
            ans.append(tmp_src)
        else:
            if not src_list[i].split()[-1] in tmp.keys():
                ans.append(src_list[i])
        i += 1
    return ans


def translate(unfolded_list: list) -> list:
    # 将跳转符号转换成数字

    ans = []
    J = ["j", "jal", "jr"]
    for i in range(0, len(unfolded_list)):
        if unfolded_list[i].split()[-1] in j_command_dict.keys():
            if unfolded_list[i].split()[0].strip() in J:
                x = j_command_dict[unfolded_list[i].split()[-1]][1][0]
                for v in j_command_dict.values():
                    for num in v[-1]:
                        if num < x:
                            x -= 1
                code_str = str(x)
                ans.append(unfolded_list[i].split()[0] + " " + code_str)
                continue
            else:
                code_str = str(
                    j_command_dict[unfolded_list[i].split()[-1]][1][0] -
                    j_command_dict[unfolded_list[i].split()[-1]][0][
                        0] - 1 if j_command_dict[unfolded_list[i].split()[-1]][1][0] -
                                  j_command_dict[unfolded_list[i].split()[-1]][0][0] - 1 > 0 else
                    j_command_dict[unfolded_list[i].split()[-1]][1][0] -
                    j_command_dict[unfolded_list[i].split()[-1]][0][
                        0])

            # print("Here")
            # print(j_command_dict[unfolded_list[i].split()[-1]][1][0])
            # print(j_command_dict[unfolded_list[i].split()[-1]][0][0])
            # print(code_str)
            cnt = 0
            # print(signal)
            for location in signal:
                # print("Now")
                # print(location)
                a = min([j_command_dict[unfolded_list[i].split()[-1]][1][0],
                         j_command_dict[unfolded_list[i].split()[-1]][0][0]])
                b = max([j_command_dict[unfolded_list[i].split()[-1]][1][0],
                         j_command_dict[unfolded_list[i].split()[-1]][0][0]])
                # print(a)
                # print(b)
                # print(a < location < b)
                if a < location < b:
                    cnt += 1
            # print(cnt)
            code = int(code_str)
            code = code - cnt if code > 0 else code + cnt
            # print("vrwv")
            # print(cnt)
            code_str = str(code)
            # print(code_str)
            # print(signal)
            new_command = unfolded_list[i].replace(unfolded_list[i].split()[-1], code_str)
            j_command_dict[unfolded_list[i].split()[-1]][0].pop(0)
            ans.append(new_command)
        else:
            ans.append(unfolded_list[i])
    return ans


# ans1 = read()
# process(ans1[:])
# j_command_dict = copy.deepcopy(tmp)
# p = unfold(ans1)
# a = translate(p)
# print(a)


def bin_to_hex(src: str) -> str:
    return str(hex(int(eval("0b" + src))))[2:].rjust(8, '0')


cnt = 0


def getBinaryCodeList(src_list) -> list:
    ans = []
    global cnt
    for i in src_list:
        if i == 'error':
            ans.append("The label must be a valid identifier")
            continue
        command_name = i.replace(',', ' ').split()[0]
        if command_name in command_dict.keys():
            command_opt = command_dict[command_name]['type']
        else:
            ans.append("Key Error!")
            continue
        # print(i)
        if command_opt == 'R':
            tmp = RType_transfer(i)
            print(tmp)
            print(i)
            if tmp.startswith('0') or tmp.startswith('1'):
                print(bin_to_hex(tmp))
                ans.append(bin_to_hex(tmp))
                cnt += 1
            else:
                ans.append(tmp)
        elif command_opt == 'I':
            tmp = IType_transfer(i)
            if tmp.startswith('0') or tmp.startswith('1'):
                print(bin_to_hex(tmp))
                ans.append(bin_to_hex(tmp))
                cnt += 1
            else:
                ans.append(tmp)
        elif command_opt == 'J':
            tmp = JType_transfer(i)
            if tmp.startswith('0') or tmp.startswith('1'):
                print(bin_to_hex(tmp))
                ans.append(bin_to_hex(tmp))
                cnt += 1
            else:
                ans.append(tmp)
    return ans


def work(src_list):
    head_str = f"""; This .COE file specifies the contents for a block memory of depth={cnt}, and width=32.
    memory_initialization_radix={cnt};
    memory_initialization_vector=\n"""
    with open(output_location, mode='w') as fp:
        fp.write(head_str)
        for i in src_list:
            fp.write(i)
            fp.write(',\n')


if __name__ == '__main__':
    ans1 = read()
    print(ans1)
    process(ans1[:])
    j_command_dict = copy.deepcopy(tmp)
    print(j_command_dict)
    for i in list(j_command_dict.values()):
        signal += i[1]
    p = f(ans1)
    print(p)
    src_list = translate(p)
    print(src_list)
    bin_code_list = getBinaryCodeList(src_list)
    work(bin_code_list)
更新于2024/01/21