逐次加一+bell有向图算符优先分析技术从文件读取规则，然后生成算符优先矩阵，利用逐次加一和bell有向图生成fx和gx的关系。最后根据优先关系矩阵判断是不是句子并给出识别过程。支持文件输入不同的文法

最新推荐文章于 2024-09-16 20:08:29 发布
不归路&
最新推荐文章于 2024-09-16 20:08:29 发布
阅读量308
点赞数 5
文章标签：矩阵 java 前端编辑器 python pycharm
本文链接：https://blog.csdn.net/weixin_64559564/article/details/134521421
版权
本文介绍了如何利用逐次加一和Bell有向图技术从文件中读取文法规则，生成算符优先矩阵，进而判断句子并生成解析过程。算法支持不同文法，生成Fx和gx关系。
摘要由CSDN通过智能技术生成
本程序实现了利用逐次加一+bell有向图算符优先分析技术从文件读取规则，然后生成算符优先矩阵，利用逐次加一和bell有向图生成fx和gx的关系。最后根据优先关系矩阵判断是不是句子并给出识别过程。支持文件输入不同的文法，根据不同的文法来产生不同的规则，生成不同的Fx和gx.
代码如下：
productions = {}
def calculate_first(productions):
    # 初始化一个空字典用于存储FIRST集合
    FT = {}
    for key, values in productions.items():
        FT[key] = set()

    # 计算每个产生式的FIRST集合
    for key, values in productions.items():
        for left in values:
            # 如果产生式的第一个字符是终结符，则将其添加到对应非终结符的FIRST集合中
            if not left[0].isupper():
                FT[key].add(left[0])
            # 如果产生式的长度大于等于2，并且第二个字符是终结符，则将其添加到对应非终结符的FIRST集合中
            elif len(left) >= 2:
                if not left[1].isupper():
                    FT[key].add(left[1])

    # 复制FT字典，用于迭代计算FIRST集合
    FTP = FT.copy()
    change = True
    while change:
        change = False
        for key, values in productions.items():
            for left in values:
                if left[0].isupper():
                    temp = FTP[key].copy()
                    # 将左侧非终结符的FIRST集合与当前非终结符的FIRST集合进行合并
                    FTP[key] = FTP[key] | FTP[left[0]]
                    if temp != FTP[key]:
                        change = True

    return FTP


def calculate_last(productions):
    # 初始化一个空字典用于存储LAST集合
    LT = {}
    for key, values in productions.items():
        LT[key] = set()

    # 计算每个产生式的LAST集合
    for key, values in productions.items():
        for left in values:
            # 如果产生式的最后一个字符是终结符，则将其添加到对应非终结符的LAST集合中
            if not left[len(left) - 1].isupper():
                LT[key].add(left[len(left) - 1])
            # 如果产生式的长度大于等于2，并且倒数第二个字符是终结符，则将其添加到对应非终结符的LAST集合中
            elif len(left) >= 2:
                if not left[len(left) - 2].isupper():
                    LT[key].add(left[len(left) - 2])

    # 复制LT字典，用于迭代计算LAST集合
    LTP = LT.copy()
    change = True
    while change:
        change = False
        for key, values in productions.items():
            for left in values:
                if left[len(left) - 1].isupper():
                    temp = LTP[key].copy()
                    # 将右侧非终结符的LAST集合与当前非终结符的LAST集合进行合并
                    LTP[key] = LTP[key] | LTP[left[len(left) - 1]]
                    if temp != LTP[key]:
                        change = True

    return LTP
def generate_parsing_table(productions, LTP, FTP):
    # 创建一个集合S，包含所有的终结符、非终结符和特殊符号#
    S = set()
    for key, values in productions.items():
        S = S | LTP[key] | FTP[key]

    # 创建一个空的分析表L，用于存储分析动作
    L = {}
    for item in S:
        for item2 in S:
            L[(item, item2)] = 0

    # 填充分析表L的规约和移入动作
    for key, values in productions.items():
        for left in values:
            if len(left) >= 2:
                for i in range(1, len(left)):
                    if left[i - 1].isupper() and not left[i].isupper():#....E)
                        # 规约动作：如果左侧非终结符的LAST集合包含当前终结符，则将对应的分析表项设置为1
                        for ch in LTP[left[i - 1]]:#...>
                            L[(ch, left[i])] = 1
                    if left[i].isupper() and not left[i - 1].isupper():#......+T
                        # 移入动作：如果右侧非终结符的FIRST集合包含当前终结符，则将对应的分析表项设置为2
                        for ch in FTP[left[i]]:#.....<
                            L[(left[i - 1], ch)] = 2

                for i in range(len(left) - 2):
                    if not left[i].isupper() and left[i + 1].isupper() and not left[i + 2].isupper():# .......aAa的形式，相等
                        # 规约动作：如果产生式中存在非终结符与非终结符之间的终结符，则将对应的分析表项设置为3
                        L[(left[i], left[i + 2])] = 3

    # 填充分析表L的接受动作和错误处理动作
    for item in S:
        L[("#", item)] = 2
        L[(item, "#")] = 1
    L[("#", "#")] = 0

    return L


def pop_elements(lst, i, j):
    return lst[:i] + lst[j + 1:]


def find_matching_rule(substring, productions):
    match = False
    for left, rights in productions.items():

        for right in rights:
            if match == True:
                return match
            if len(substring) == len(right):

                for i in range(len(right)):

                    if right[i].isupper() or right[i] == 'i':
                        if substring[i].isupper() or substring[i] == 'i':
                            match = True

                    if not right[i].isupper() and right[i] != 'i':
                        if substring[i] == right[i]:
                            match = True

    return match


def process_sentence(sentence, L):
    stack = []  # 初始化堆栈
    queue = ["#"]  # 初始化队列，用于存储句子中的字符
    for char in sentence:
        queue.append(char)  # 将句子中的字符逐个添加到队列中
    queue.append("#")  # 在队列末尾添加结束标记"#"
    stack.append(queue.pop(0))  # 将队列中的第一个字符从队列中弹出，并将其添加到堆栈中
    i = 0  # 初始化堆栈指针

    while True:  # 开始语法分析的主循环
        R = queue.pop(0)  # 从队列中弹出下一个字符作为当前输入符号R
        if stack[i] != "#" and stack[i].isupper():  # 如果堆栈顶部元素不是"#"且是大写字母
            j = i - 1  # 将指针j设置为i-1
        else:
            j = i  # 否则将指针j设置为i

        if L[(stack[j], R)] == 1:  # 如果语法分析表中的值为1
            while True:  # 进入内部循环，直到满足特定条件时跳出循环
                while True:
                    Q = stack[j]  # 将堆栈中的第j个元素作为Q
                    j = j - 1  # 将指针j向左移动一位
                    if stack[j].isupper():  # 如果堆栈中的第j个元素是大写字母
                        j = j - 1  # 将指针j再向左移动一位
                    if L[(stack[j], Q)] != 2:  # 如果语法分析表中的值不为2
                        continue  # 继续内部循环，继续向左遍历堆栈
                    else:
                        substring = "".join(stack[j + 1: i + 1])  # 获取堆栈中从位置j+1到位置i的子串
                        matched_rule = find_matching_rule(substring, productions)  # 判断子串是否与某个规则右部匹配

                        if matched_rule:
                            stack = pop_elements(stack, j + 1, i)  # 弹出堆栈中从位置j+1到位置i的元素（包括j+1和i）
                            i = j + 1  # 更新指针i的位置为j+1
                            stack.append("N")  # 将匹配的规则左部非终结符添加到堆栈中
                            break  # 跳出当前循环
                        else:
                            return "error sentence"  # 子串与规则右部不匹配，返回错误信息

                if i == 1 and R == "#":  # 如果指针i为1且当前输入符号R为结束标记"#"
                    return "right sentence"  # 返回语法分析正确
                else:
                    if L[(stack[j], R)] == 0:  # 如果语法分析表中的值为0
                        return "error sentence"  # 返回语法分析错误
                    if L[(stack[j], R)] != 1:  # 如果语法分析表中的值不为1
                        i = i + 1  # 将指针i向右移动一位
                        stack.append(R)  # 将当前输入符号R添加到堆栈中
                        break  # 跳出内部循环，继续外部循环的下一次迭代
                    continue  # 继续内部循环，继续向右遍历堆栈
        else:
            if L[(stack[j], R)] == 0:  # 如果语法分析表中的值为0
                return "error sentence"  # 返回语法分析错误
            i = i + 1  # 将指针i向右移动一位
            stack.append(R)  # 将当前输入符号R添加到堆栈中
            continue  # 继续外部循环的下一次迭代


# 从文件中读取产生式，并将其解析为字典形式的产生式集合
def read_productions_from_file(file_path):
    global productions
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line:
                left, right = line.split('->')
                left = left.strip()
                right = right.strip().split('|')
                productions[left] = right
    return productions


# 从文件中逐行读取句子，并使用给定的解析表L对每个句子进行语法分析，并打印结果
def process_sentences_from_file(file_path, L):
    with open(file_path, 'r') as file:
        for line in file:
            sentence = line.strip()
            result = process_sentence(sentence, L)
            print(sentence + " " + result)

from tabulate import tabulate


def print_parsing_table(L):
    term = sorted(set([item[1] for item in L.keys()]))
    term.remove("#")
    header = term
    rows = []
    for i in term:
        row = []
        row.append(i)
        for j in term:
            if L[(i, j)] == 1:
                row.append(">")
            if L[(i, j)] == 2:
                row.append("<")
            if L[(i, j)] == 3:
                row.append("=")
        rows.append(row)
    print(tabulate(rows, headers=header, tablefmt="grid"))


def construct_operator_precedence_matrix(L):
    # 提取所有运算符
    operators = set()
    for t1, t2 in L:
        operators.add(t1)
        operators.add(t2)

    # 初始化函数矩阵F和G，将所有运算符的初始值设置为1
    F = {t: 1 for t in operators}
    G = {t: 1 for t in operators}

    # 迭代更新函数矩阵F和G，直到没有值发生变化为止
    while True:
        changed = False

        # 遍历解析表中的条目
        for t1, t2 in L:
            # 根据解析表中的关系类型，更新F和G的值
            if L[(t1, t2)] == 2 and F[t1] >= G[t2]:
                G[t2] = F[t1] + 1
                changed = True
            elif L[(t1, t2)] == 1 and F[t1] <= G[t2]:
                F[t1] = G[t2] + 1
                changed = True
            elif L[(t1, t2)] == 3 and F[t1] != G[t2]:
                max_value = max(F[t1], G[t2])
                F[t1] = max_value
                G[t2] = max_value
                changed = True

        # 如果没有值发生变化，则结束循环
        if not changed:
            break

    # 返回最终的函数矩阵F和G
    return F, G


def bellG(L):
    term = ["+", "*", "(", ")", "i"]
    #term = ['a','b',',','(',')']
    pref = []
    for i in term:
        row = []

        for j in term:

            if L[(i, j)] == 1:
                row.append(">")
            if L[(i, j)] == 2:
                row.append("<")
            if L[(i, j)] == 3:
                row.append("=")
            if L[(i, j)] == 0:
                row.append(" ")
        pref.append(row)
    rows = 2 * len(term)
    cols = rows
    matrix = [[0 for _ in range(cols)] for _ in range(rows)]
    for i in range(0, len(pref)):
        for j in range(0, len(pref)):
            if pref[i][j] == ">" or pref[i][j] == "=":
                matrix[i][j + len(pref)] = 1
            if pref[i][j] == "<" or pref[i][j] == "=":
                matrix[j + len(pref)][i] = 1

    print("构造矩阵B：")
    print(tabulate((matrix), headers=term + term, showindex=term+term,

                   tablefmt="grid"))
    print()

    for i in range(len(matrix)):
        for j in range(len(matrix)):
            if matrix[j][i] == 1:
                for k in range(len(matrix)):
                    matrix[j][k] = matrix[j][k] + matrix[i][k]

    for i in range(len(matrix)):
        for j in range(len(matrix)):
            if i == j:
                matrix[j][i] = 1

    for i in range(len(matrix)):
        for j in range(len(matrix)):
            if matrix[i][j] != 0:
                matrix[i][j] = 1
    print("构造矩阵B*：")
    print(tabulate(matrix, headers=term+term,showindex=term+term, tablefmt="grid"))
    print()
    f = ["f()"]

    g = ["g()"]
    for i in range(len(matrix)):
        if i < len(term):
            f.append(str(sum(matrix[i])))
        else:
            g.append(str(sum(matrix[i])))
    print("统计每行个数构造函数：")
    print(tabulate((f, g), headers="", tablefmt="grid"))


def main():
    global productions
    # 定义或从文件中读取产生式
    #file_path = 'grammar.txt'
   # productions = read_productions_from_file(file_path)
    '''
    productions = {
     'Z': ["E"],
     'E': ["E+T", "T"],
     'T': ["T*F", "F"],
     'F': ["(E)", "i"]
     }
    '''
    print("产生式为：\nZ->E\nE->E+T|T\nT->T*F|F\nF->(E)|i\n")

    # 计算产生式的First集合
    FTP = calculate_first(productions)
    print(FTP)

    # 计算产生式的Last集合
    LTP = calculate_last(productions)
    print(LTP)

    # 生成解析表
    L = generate_parsing_table(productions, LTP, FTP)
    print(f'测试1{L}')
    print("优先关系为：\n")
    # 构建运算符优先关系矩阵
    F, G = construct_operator_precedence_matrix(L)
    # 打印解析表L
    print("Parsing Table L:")
    print(f"ceshi1{L}")
    print_parsing_table(L)

    print("优先函数为：")
    print(tabulate((F, G), headers=" ", tablefmt="grid"))
    sentences_file_path = 'sentences.txt'
    print("判断结果为：\n")
    # 从文件中读取测试句子并进行语法分析
    process_sentences_from_file(sentences_file_path, L)
    print("bell有向图生成fg\n")
    bellG(L)


if __name__ == "__main__":
    file_path = 'grammar.txt'
    read_productions_from_file(file_path)
    print(read_productions_from_file(file_path))
    main()