本程序实现了利用逐次加一+bell有向图算符优先分析技术从文件读取规则,然后生成算符优先矩阵,利用逐次加一和bell有向图生成fx和gx的关系。最后根据优先关系矩阵判断是不是句子并给出识别过程。支持文件输入不同的文法,根据不同的文法来产生不同的规则,生成不同的Fx和gx.
代码如下:
productions = {}
def calculate_first(productions):
# 初始化一个空字典用于存储FIRST集合
FT = {}
for key, values in productions.items():
FT[key] = set()
# 计算每个产生式的FIRST集合
for key, values in productions.items():
for left in values:
# 如果产生式的第一个字符是终结符,则将其添加到对应非终结符的FIRST集合中
if not left[0].isupper():
FT[key].add(left[0])
# 如果产生式的长度大于等于2,并且第二个字符是终结符,则将其添加到对应非终结符的FIRST集合中
elif len(left) >= 2:
if not left[1].isupper():
FT[key].add(left[1])
# 复制FT字典,用于迭代计算FIRST集合
FTP = FT.copy()
change = True
while change:
change = False
for key, values in productions.items():
for left in values:
if left[0].isupper():
temp = FTP[key].copy()
# 将左侧非终结符的FIRST集合与当前非终结符的FIRST集合进行合并
FTP[key] = FTP[key] | FTP[left[0]]
if temp != FTP[key]:
change = True
return FTP
def calculate_last(productions):
# 初始化一个空字典用于存储LAST集合
LT = {}
for key, values in productions.items():
LT[key] = set()
# 计算每个产生式的LAST集合
for key, values in productions.items():
for left in values:
# 如果产生式的最后一个字符是终结符,则将其添加到对应非终结符的LAST集合中
if not left[len(left) - 1].isupper():
LT[key].add(left[len(left) - 1])
# 如果产生式的长度大于等于2,并且倒数第二个字符是终结符,则将其添加到对应非终结符的LAST集合中
elif len(left) >= 2:
if not left[len(left) - 2].isupper():
LT[key].add(left[len(left) - 2])
# 复制LT字典,用于迭代计算LAST集合
LTP = LT.copy()
change = True
while change:
change = False
for key, values in productions.items():
for left in values:
if left[len(left) - 1].isupper():
temp = LTP[key].copy()
# 将右侧非终结符的LAST集合与当前非终结符的LAST集合进行合并
LTP[key] = LTP[key] | LTP[left[len(left) - 1]]
if temp != LTP[key]:
change = True
return LTP
def generate_parsing_table(productions, LTP, FTP):
# 创建一个集合S,包含所有的终结符、非终结符和特殊符号#
S = set()
for key, values in productions.items():
S = S | LTP[key] | FTP[key]
# 创建一个空的分析表L,用于存储分析动作
L = {}
for item in S:
for item2 in S:
L[(item, item2)] = 0
# 填充分析表L的规约和移入动作
for key, values in productions.items():
for left in values:
if len(left) >= 2:
for i in range(1, len(left)):
if left[i - 1].isupper() and not left[i].isupper():#....E)
# 规约动作:如果左侧非终结符的LAST集合包含当前终结符,则将对应的分析表项设置为1
for ch in LTP[left[i - 1]]:#...>
L[(ch, left[i])] = 1
if left[i].isupper() and not left[i - 1].isupper():#......+T
# 移入动作:如果右侧非终结符的FIRST集合包含当前终结符,则将对应的分析表项设置为2
for ch in FTP[left[i]]:#.....<
L[(left[i - 1], ch)] = 2
for i in range(len(left) - 2):
if not left[i].isupper() and left[i + 1].isupper() and not left[i + 2].isupper():# .......aAa的形式,相等
# 规约动作:如果产生式中存在非终结符与非终结符之间的终结符,则将对应的分析表项设置为3
L[(left[i], left[i + 2])] = 3
# 填充分析表L的接受动作和错误处理动作
for item in S:
L[("#", item)] = 2
L[(item, "#")] = 1
L[("#", "#")] = 0
return L
def pop_elements(lst, i, j):
return lst[:i] + lst[j + 1:]
def find_matching_rule(substring, productions):
match = False
for left, rights in productions.items():
for right in rights:
if match == True:
return match
if len(substring) == len(right):
for i in range(len(right)):
if right[i].isupper() or right[i] == 'i':
if substring[i].isupper() or substring[i] == 'i':
match = True
if not right[i].isupper() and right[i] != 'i':
if substring[i] == right[i]:
match = True
return match
def process_sentence(sentence, L):
stack = [] # 初始化堆栈
queue = ["#"] # 初始化队列,用于存储句子中的字符
for char in sentence:
queue.append(char) # 将句子中的字符逐个添加到队列中
queue.append("#") # 在队列末尾添加结束标记"#"
stack.append(queue.pop(0)) # 将队列中的第一个字符从队列中弹出,并将其添加到堆栈中
i = 0 # 初始化堆栈指针
while True: # 开始语法分析的主循环
R = queue.pop(0) # 从队列中弹出下一个字符作为当前输入符号R
if stack[i] != "#" and stack[i].isupper(): # 如果堆栈顶部元素不是"#"且是大写字母
j = i - 1 # 将指针j设置为i-1
else:
j = i # 否则将指针j设置为i
if L[(stack[j], R)] == 1: # 如果语法分析表中的值为1
while True: # 进入内部循环,直到满足特定条件时跳出循环
while True:
Q = stack[j] # 将堆栈中的第j个元素作为Q
j = j - 1 # 将指针j向左移动一位
if stack[j].isupper(): # 如果堆栈中的第j个元素是大写字母
j = j - 1 # 将指针j再向左移动一位
if L[(stack[j], Q)] != 2: # 如果语法分析表中的值不为2
continue # 继续内部循环,继续向左遍历堆栈
else:
substring = "".join(stack[j + 1: i + 1]) # 获取堆栈中从位置j+1到位置i的子串
matched_rule = find_matching_rule(substring, productions) # 判断子串是否与某个规则右部匹配
if matched_rule:
stack = pop_elements(stack, j + 1, i) # 弹出堆栈中从位置j+1到位置i的元素(包括j+1和i)
i = j + 1 # 更新指针i的位置为j+1
stack.append("N") # 将匹配的规则左部非终结符添加到堆栈中
break # 跳出当前循环
else:
return "error sentence" # 子串与规则右部不匹配,返回错误信息
if i == 1 and R == "#": # 如果指针i为1且当前输入符号R为结束标记"#"
return "right sentence" # 返回语法分析正确
else:
if L[(stack[j], R)] == 0: # 如果语法分析表中的值为0
return "error sentence" # 返回语法分析错误
if L[(stack[j], R)] != 1: # 如果语法分析表中的值不为1
i = i + 1 # 将指针i向右移动一位
stack.append(R) # 将当前输入符号R添加到堆栈中
break # 跳出内部循环,继续外部循环的下一次迭代
continue # 继续内部循环,继续向右遍历堆栈
else:
if L[(stack[j], R)] == 0: # 如果语法分析表中的值为0
return "error sentence" # 返回语法分析错误
i = i + 1 # 将指针i向右移动一位
stack.append(R) # 将当前输入符号R添加到堆栈中
continue # 继续外部循环的下一次迭代
# 从文件中读取产生式,并将其解析为字典形式的产生式集合
def read_productions_from_file(file_path):
global productions
with open(file_path, 'r') as file:
for line in file:
line = line.strip()
if line:
left, right = line.split('->')
left = left.strip()
right = right.strip().split('|')
productions[left] = right
return productions
# 从文件中逐行读取句子,并使用给定的解析表L对每个句子进行语法分析,并打印结果
def process_sentences_from_file(file_path, L):
with open(file_path, 'r') as file:
for line in file:
sentence = line.strip()
result = process_sentence(sentence, L)
print(sentence + " " + result)
from tabulate import tabulate
def print_parsing_table(L):
term = sorted(set([item[1] for item in L.keys()]))
term.remove("#")
header = term
rows = []
for i in term:
row = []
row.append(i)
for j in term:
if L[(i, j)] == 1:
row.append(">")
if L[(i, j)] == 2:
row.append("<")
if L[(i, j)] == 3:
row.append("=")
rows.append(row)
print(tabulate(rows, headers=header, tablefmt="grid"))
def construct_operator_precedence_matrix(L):
# 提取所有运算符
operators = set()
for t1, t2 in L:
operators.add(t1)
operators.add(t2)
# 初始化函数矩阵F和G,将所有运算符的初始值设置为1
F = {t: 1 for t in operators}
G = {t: 1 for t in operators}
# 迭代更新函数矩阵F和G,直到没有值发生变化为止
while True:
changed = False
# 遍历解析表中的条目
for t1, t2 in L:
# 根据解析表中的关系类型,更新F和G的值
if L[(t1, t2)] == 2 and F[t1] >= G[t2]:
G[t2] = F[t1] + 1
changed = True
elif L[(t1, t2)] == 1 and F[t1] <= G[t2]:
F[t1] = G[t2] + 1
changed = True
elif L[(t1, t2)] == 3 and F[t1] != G[t2]:
max_value = max(F[t1], G[t2])
F[t1] = max_value
G[t2] = max_value
changed = True
# 如果没有值发生变化,则结束循环
if not changed:
break
# 返回最终的函数矩阵F和G
return F, G
def bellG(L):
term = ["+", "*", "(", ")", "i"]
#term = ['a','b',',','(',')']
pref = []
for i in term:
row = []
for j in term:
if L[(i, j)] == 1:
row.append(">")
if L[(i, j)] == 2:
row.append("<")
if L[(i, j)] == 3:
row.append("=")
if L[(i, j)] == 0:
row.append(" ")
pref.append(row)
rows = 2 * len(term)
cols = rows
matrix = [[0 for _ in range(cols)] for _ in range(rows)]
for i in range(0, len(pref)):
for j in range(0, len(pref)):
if pref[i][j] == ">" or pref[i][j] == "=":
matrix[i][j + len(pref)] = 1
if pref[i][j] == "<" or pref[i][j] == "=":
matrix[j + len(pref)][i] = 1
print("构造矩阵B:")
print(tabulate((matrix), headers=term + term, showindex=term+term,
tablefmt="grid"))
print()
for i in range(len(matrix)):
for j in range(len(matrix)):
if matrix[j][i] == 1:
for k in range(len(matrix)):
matrix[j][k] = matrix[j][k] + matrix[i][k]
for i in range(len(matrix)):
for j in range(len(matrix)):
if i == j:
matrix[j][i] = 1
for i in range(len(matrix)):
for j in range(len(matrix)):
if matrix[i][j] != 0:
matrix[i][j] = 1
print("构造矩阵B*:")
print(tabulate(matrix, headers=term+term,showindex=term+term, tablefmt="grid"))
print()
f = ["f()"]
g = ["g()"]
for i in range(len(matrix)):
if i < len(term):
f.append(str(sum(matrix[i])))
else:
g.append(str(sum(matrix[i])))
print("统计每行个数构造函数:")
print(tabulate((f, g), headers="", tablefmt="grid"))
def main():
global productions
# 定义或从文件中读取产生式
#file_path = 'grammar.txt'
# productions = read_productions_from_file(file_path)
'''
productions = {
'Z': ["E"],
'E': ["E+T", "T"],
'T': ["T*F", "F"],
'F': ["(E)", "i"]
}
'''
print("产生式为:\nZ->E\nE->E+T|T\nT->T*F|F\nF->(E)|i\n")
# 计算产生式的First集合
FTP = calculate_first(productions)
print(FTP)
# 计算产生式的Last集合
LTP = calculate_last(productions)
print(LTP)
# 生成解析表
L = generate_parsing_table(productions, LTP, FTP)
print(f'测试1{L}')
print("优先关系为:\n")
# 构建运算符优先关系矩阵
F, G = construct_operator_precedence_matrix(L)
# 打印解析表L
print("Parsing Table L:")
print(f"ceshi1{L}")
print_parsing_table(L)
print("优先函数为:")
print(tabulate((F, G), headers=" ", tablefmt="grid"))
sentences_file_path = 'sentences.txt'
print("判断结果为:\n")
# 从文件中读取测试句子并进行语法分析
process_sentences_from_file(sentences_file_path, L)
print("bell有向图生成fg\n")
bellG(L)
if __name__ == "__main__":
file_path = 'grammar.txt'
read_productions_from_file(file_path)
print(read_productions_from_file(file_path))
main()