实验目的
通过预测分析算法的设计与实现,加深对自上而下语法分析方法的理解,尤其是对自上而下分析条件的理解。
实验内容
输入文法及待分析的输入串,输出其预测分析过程及结果。
实验报告
1)文本预处理:
从文件中读取文法,将文法处理成后续可利用的字典形式,同时记录终结符和非终结符。这部分涉及两个函数,分别是data_input():从文本中读取文法、和getGrammar(x):处理文法
def data_input(): #读取文法
with open("input.txt", 'r+', encoding="utf-8") as f:
temp = f.readlines()
for i in temp:
line = str(i.strip("\n"))
if line[0] not in non_ter :
non_ter.append(line[0])
grammarElement.setdefault(line[0],line[5:] )
else:
grammarElement[line[0]] += "|"+line[5:]
for i in temp:
line = str(i.strip("\n")).replace(" -> ","")
for j in line:
if j not in non_ter and j not in terSymblo:
terSymblo.append(j)
terSymblo.remove('ε')
def getGrammar(x): #文本预处理
data = []
if '|' in grammarElement[x]:
data = grammarElement[x].split('|')
else:
data.append(grammarElement[x])
formules = []
for i in range(len(data)):
patten = re.compile("[*+\-a-zA-Zε()]'?")
temp = re.findall(patten, data[i])
if temp:
temp = "".join(temp)
formules.append(temp)
grammarElement[x]=formules
(2)求first集合:
def getFirst(T): #求first集
if T in terSymblo or T == 'ε':
firstSET[T] = T
return T
x = firstSET.get(T,None)
if x:
return x
else:
tempFirst = ""
for prodForm in grammarElement[T]:
tempFirst += getFirst(prodForm[0])
if len(prodForm) >= 2 and prodForm[1] in non_ter and prodForm[0] in non_ter and 'ε' in grammarElement[prodForm[0]]:
tempFirst += getFirst(prodForm[1])
firstSET.update({T: tempFirst})
return tempFirst
(3)求follow集合
def get_follow(T): #求follow集
formule = []
top = []
for i in grammarElement.keys():
for j in grammarElement[i]:
formule.append(j)
top.append(i)
for i in formule:
for j in range(len(i)):
if T == i[j]:
if j == len(i) - 1 : #非终结符在文法末尾
for e in followSET[top[formule.index(i)]]:
if e != 'ε' and e not in followSET[T]:
followSET[T] += e
break
else: #非终结符不在文法末尾
if i[j+1] in terSymblo and i[j+1] not in followSET[T]:
followSET[T] += i[j+1]
break
else: #后面跟着的是非终结符
next = i[j+1]
if 'ε' not in firstSET[next]:
for e in firstSET[next]:
if e not in followSET[T]:
followSET[T] += e
else:
for e in firstSET[next]:
if e != 'ε' and e not in followSET[T]:
followSET[T] += e
for e in followSET[top[formule.index(i)]]:
if e != 'ε' and e not in followSET[T]:
followSET[T] += e
(4)求预测分析表
本程序中预测分析表采用二维字典的形式进行存储,首先要编写一个二维数组的设置函数
def addtodict2(thedict, key_a, key_b, val): #设置二维字典的函数
if key_a in thedict.keys():
thedict[key_a].update({key_b: val})
else:
thedict.update({key_a:{key_b: val}})
之后是预测分析表的构造:
def analy(): #预测分析表
for i in non_ter:
for j in terSymblo:
if j in firstSET[i]:
if len(grammarElement[i])==1:
addtodict2(data,i,j,grammarElement[i][0])
else:
for k in grammarElement[i]:
if j in k:
addtodict2(data,i,j,k)
elif j in followSET[i]:
addtodict2(data,i,j,'ε')
else:
addtodict2(data,i,j,'error')
print("\t\t",end="")
for i in terSymblo:
print(i.ljust(8),end="")
print()
for i in data.keys():
print(i.ljust(8),end="")
for j in data[i]:
if data[i][j] != 'error':
temp = i+'->'+data[i][j]
print(temp.ljust(8),end="")
else:
print('error'.ljust(8),end="")
print()
(5)主程序
主程序中要控制各分函数的运行,以及进行分析过程和展示数据过程
print("预处理:")
for i in grammarElement.keys():
print(i+"->",end="")
print(grammarElement[i])
#print(firstSET)
print("FIRST集合:")
for i in non_ter:
print(i+" : "+firstSET[i])
#print(followSET)
print("FOLLOW集合:")
for i in non_ter:
print(i+" : "+followSET[i])
data= dict()
print("预测分析表:")
analy()
#输入句子
instr = input("请输入需要进行分析的语句:\n")
instr+="#"
stack = []
stack.append('#')
stack.append(Start)
ind = 0
print("分析过程:\n符号栈\t\t\t输入串\t\t\t\t所用产生式")
try:
while (1):
print("".join(stack).ljust(16), end="")
top = stack.pop()
now = instr[ind]
temp = ""
for j in range(ind, len(instr)):
temp += instr[j]
print(temp.ljust(20), end="")
if now == top:
if now == "#":
print()
print("句子符合文法")
flag = 1
break
else:
ind += 1
print()
continue
else:
line = data[top][now]
if line != 'ε' and line != 'error':
temp = top + "->" + line
print(temp.ljust(16))
line = list(line)
for i in range(len(line)):
s = line.pop()
stack.append(s)
continue
elif line == 'ε':
temp = top + "->" + line
print(temp.ljust(16))
continue
else:
print()
continue
except:
print("\n句子不符合文法")
最终程序
最终程序
import re
grammarElement = {}
terSymblo = ['#']
non_ter = []
Start = 'S'
allSymbol = [] #所有符号
firstSET = {} # 各产生式右部的FIRST集
followSET = {} # 各产生式左部的FOLLOW集
def getGrammar(x): #文本预处理
data = []
if '|' in grammarElement[x]:
data = grammarElement[x].split('|')
else:
data.append(grammarElement[x])
formules = []
for i in range(len(data)):
patten = re.compile("[,*+^\-a-zA-Zε()]'?")
temp = re.findall(patten, data[i])
if temp:
temp = "".join(temp)
formules.append(temp)
grammarElement[x]=formules
def getFirst(T): #求first集
if T in terSymblo or T == 'ε':
firstSET[T] = T
return T
x = firstSET.get(T,None)
if x:
return x
else:
tempFirst = ""
for prodForm in grammarElement[T]:
tempFirst += getFirst(prodForm[0])
if len(prodForm) >= 2 and prodForm[1] in non_ter and prodForm[0] in non_ter and 'ε' in grammarElement[prodForm[0]]:
tempFirst += getFirst(prodForm[1])
tempFirst = "".join(set(list(tempFirst)))
firstSET.update({T: tempFirst})
return tempFirst
def get_follow(T): #求follow集
formule = []
top = []
for i in grammarElement.keys():
for j in grammarElement[i]:
formule.append(j)
top.append(i)
for i in formule:
for j in range(len(i)):
if T == i[j]:
if j == len(i) - 1 : #非终结符在文法末尾
for e in followSET[top[formule.index(i)]]:
if e != 'ε' and e not in followSET[T]:
followSET[T] += e
break
else: #非终结符不在文法末尾
if i[j+1] in terSymblo and i[j+1] not in followSET[T]:
followSET[T] += i[j+1]
break
else: #后面跟着的是非终结符
next = i[j+1]
if 'ε' not in firstSET[next]:
for e in firstSET[next]:
if e not in followSET[T]:
followSET[T] += e
else:
for e in firstSET[next]:
if e != 'ε' and e not in followSET[T]:
followSET[T] += e
for e in followSET[top[formule.index(i)]]:
if e != 'ε' and e not in followSET[T]:
followSET[T] += e
def addtodict2(thedict, key_a, key_b, val): #设置二维字典
if key_a in thedict.keys():
thedict[key_a].update({key_b: val})
else:
thedict.update({key_a:{key_b: val}})
def analy(): #预测分析表
for i in non_ter:
for j in terSymblo:
if j in firstSET[i]:
if len(grammarElement[i])==1:
addtodict2(data,i,j,grammarElement[i][0])
else:
for k in grammarElement[i]:
if j in k:
addtodict2(data,i,j,k)
elif j in followSET[i]:
addtodict2(data,i,j,'ε')
else:
addtodict2(data,i,j,'error')
print("\t\t",end="")
for i in terSymblo:
print(i.ljust(8),end="")
print()
for i in data.keys():
print(i.ljust(8),end="")
for j in data[i]:
if data[i][j] != 'error':
temp = i+'->'+data[i][j]
print(temp.ljust(8),end="")
else:
print('error'.ljust(8),end="")
print()
def data_input(): #读取文法
with open("input2.txt", 'r+', encoding="utf-8") as f:
temp = f.readlines()
for i in temp:
line = str(i.strip("\n"))
if line[0] not in non_ter :
non_ter.append(line[0])
grammarElement.setdefault(line[0],line[5:] )
else:
grammarElement[line[0]] += "|"+line[5:]
for i in temp:
line = str(i.strip("\n")).replace(" -> ","")
for j in line:
if j not in non_ter and j not in terSymblo:
terSymblo.append(j)
terSymblo.remove('ε')
data_input()
for i in non_ter:
getGrammar(i)
sym = non_ter+terSymblo
for i in sym:
if i ==Start:
followSET.setdefault(i,"#")
else:
followSET.setdefault(i,"")
for i in non_ter:
getFirst(i)
firstSET.setdefault(')',"")
for n in range(100):
for i in non_ter:
get_follow(i)
print("预处理:")
for i in grammarElement.keys():
print(i+"->",end="")
print(grammarElement[i])
#print(firstSET)
print("FIRST集合:")
for i in non_ter:
print(i+" : "+firstSET[i])
#print(followSET)
print("FOLLOW集合:")
for i in non_ter:
print(i+" : "+followSET[i])
data= dict()
print("预测分析表:")
analy()
#输入句子
instr = input("请输入需要进行分析的语句:\n")
instr+="#"
stack = []
stack.append('#')
stack.append(Start)
ind = 0
print("分析过程:\n符号栈\t\t\t输入串\t\t\t\t所用产生式")
try:
while (1):
print("".join(stack).ljust(16), end="")
top = stack.pop()
now = instr[ind]
temp = ""
for j in range(ind, len(instr)):
temp += instr[j]
print(temp.ljust(20), end="")
if now == top:
if now == "#":
print()
print("句子符合文法")
flag = 1
break
else:
ind += 1
print()
continue
else:
line = data[top][now]
if line != 'ε' and line != 'error':
temp = top + "->" + line
print(temp.ljust(16))
line = list(line)
for i in range(len(line)):
s = line.pop()
stack.append(s)
continue
elif line == 'ε':
temp = top + "->" + line
print(temp.ljust(16))
continue
else:
print()
continue
except:
print("\n句子不符合文法")
测试
测试文法:
S -> a
S -> ^
S -> (T)
T -> St
t -> ,St
t -> ε
运行
完成