编译原理实验二:预测分析算法的设计与实现(python)

实验目的

通过预测分析算法的设计与实现,加深对自上而下语法分析方法的理解,尤其是对自上而下分析条件的理解。

实验内容

输入文法及待分析的输入串,输出其预测分析过程及结果。

实验报告

1)文本预处理:
从文件中读取文法,将文法处理成后续可利用的字典形式,同时记录终结符和非终结符。这部分涉及两个函数,分别是data_input():从文本中读取文法、和getGrammar(x):处理文法

def data_input(): #读取文法
    with open("input.txt", 'r+', encoding="utf-8") as f:
        temp = f.readlines()
    for i in temp:
        line = str(i.strip("\n"))
        if line[0] not in non_ter :
            non_ter.append(line[0])
            grammarElement.setdefault(line[0],line[5:] )
        else:
            grammarElement[line[0]] += "|"+line[5:]
    for i in temp:
        line = str(i.strip("\n")).replace(" -> ","")
        for j in line:
            if j not in non_ter and j not in terSymblo:
                terSymblo.append(j)
    terSymblo.remove('ε')

def getGrammar(x): #文本预处理
    data = []
    if '|' in grammarElement[x]:
        data = grammarElement[x].split('|')
    else:
        data.append(grammarElement[x])
    formules = []
    for i in range(len(data)):
        patten = re.compile("[*+\-a-zA-Zε()]'?")
        temp = re.findall(patten, data[i])
        if temp:
            temp  = "".join(temp)
            formules.append(temp)
    grammarElement[x]=formules

(2)求first集合:

def getFirst(T): #求first集
    if T in terSymblo or T == 'ε':
        firstSET[T] = T
        return T
    x = firstSET.get(T,None)
    if x:
        return x
    else:
        tempFirst = ""
        for prodForm in grammarElement[T]:
            tempFirst += getFirst(prodForm[0])
            if len(prodForm) >= 2 and prodForm[1] in non_ter and prodForm[0] in non_ter and 'ε' in grammarElement[prodForm[0]]:
                tempFirst += getFirst(prodForm[1])
        firstSET.update({T: tempFirst})
        return tempFirst

(3)求follow集合

def get_follow(T): #求follow集
    formule = []
    top = []
    for i  in grammarElement.keys():
        for j in grammarElement[i]:
            formule.append(j)
            top.append(i)
    for i in formule:
        for j in range(len(i)):
            if T == i[j]:
                if j == len(i) - 1  :  #非终结符在文法末尾
                    for e in followSET[top[formule.index(i)]]:
                        if e != 'ε'  and e not in followSET[T]:
                            followSET[T] += e
                    break
                else: #非终结符不在文法末尾
                    if i[j+1] in terSymblo and i[j+1] not in followSET[T]:
                        followSET[T] += i[j+1]
                        break
                    else: #后面跟着的是非终结符
                        next = i[j+1]
                        if 'ε' not in firstSET[next]:
                            for e in firstSET[next]:
                                if e not in followSET[T]:
                                    followSET[T] += e
                        else:
                            for e in firstSET[next]:
                                if e !=  'ε' and e not in followSET[T]:
                                    followSET[T] += e
                            for e in followSET[top[formule.index(i)]]:
                                if e != 'ε' and e not in followSET[T]:
                                    followSET[T] += e

(4)求预测分析表
本程序中预测分析表采用二维字典的形式进行存储,首先要编写一个二维数组的设置函数

def addtodict2(thedict, key_a, key_b, val): #设置二维字典的函数
    if key_a in thedict.keys():
        thedict[key_a].update({key_b: val})
    else:
        thedict.update({key_a:{key_b: val}})

之后是预测分析表的构造:

def analy():   #预测分析表
    for i in non_ter:
        for j in terSymblo:
            if j in firstSET[i]:
                if len(grammarElement[i])==1:
                    addtodict2(data,i,j,grammarElement[i][0])
                else:
                    for k in grammarElement[i]:
                        if j in k:
                            addtodict2(data,i,j,k)
            elif j in followSET[i]:
                addtodict2(data,i,j,'ε')
            else:
                addtodict2(data,i,j,'error')
    print("\t\t",end="")
    for i in terSymblo:
        print(i.ljust(8),end="")
    print()
    for i in data.keys():
        print(i.ljust(8),end="")
        for j in data[i]:
            if data[i][j] != 'error':
                temp = i+'->'+data[i][j]
                print(temp.ljust(8),end="")
            else:
                print('error'.ljust(8),end="")
        print()

(5)主程序
主程序中要控制各分函数的运行,以及进行分析过程和展示数据过程

print("预处理:")
for i in grammarElement.keys():
    print(i+"->",end="")
    print(grammarElement[i])
#print(firstSET)
print("FIRST集合:")
for i in non_ter:
    print(i+" : "+firstSET[i])
#print(followSET)
print("FOLLOW集合:")
for i in non_ter:
    print(i+" : "+followSET[i])
data= dict()
print("预测分析表:")
analy()
#输入句子
instr = input("请输入需要进行分析的语句:\n")
instr+="#"
stack = []
stack.append('#')
stack.append(Start)
ind = 0
print("分析过程:\n符号栈\t\t\t输入串\t\t\t\t所用产生式")
try:
    while (1):
        print("".join(stack).ljust(16), end="")
        top = stack.pop()
        now = instr[ind]
        temp = ""
        for j in range(ind, len(instr)):
            temp += instr[j]
        print(temp.ljust(20), end="")

        if now == top:
            if now == "#":
                print()
                print("句子符合文法")
                flag = 1
                break
            else:
                ind += 1
                print()
                continue
        else:
            line = data[top][now]
            if line != 'ε' and line != 'error':
                temp = top + "->" + line
                print(temp.ljust(16))
                line = list(line)
                for i in range(len(line)):
                    s = line.pop()
                    stack.append(s)
                continue
            elif line == 'ε':
                temp = top + "->" + line
                print(temp.ljust(16))
                continue
            else:
                print()
                continue
except:
    print("\n句子不符合文法")

最终程序

最终程序

import re

grammarElement = {}
terSymblo = ['#']
non_ter = []
Start = 'S'
allSymbol = [] #所有符号
firstSET = {} # 各产生式右部的FIRST集
followSET = {} # 各产生式左部的FOLLOW集

def getGrammar(x): #文本预处理
    data = []
    if '|' in grammarElement[x]:
        data = grammarElement[x].split('|')
    else:
        data.append(grammarElement[x])
    formules = []
    for i in range(len(data)):
        patten = re.compile("[,*+^\-a-zA-Zε()]'?")
        temp = re.findall(patten, data[i])
        if temp:
            temp  = "".join(temp)
            formules.append(temp)
    grammarElement[x]=formules

def getFirst(T): #求first集
    if T in terSymblo or T == 'ε':
        firstSET[T] = T
        return T
    x = firstSET.get(T,None)
    if x:
        return x
    else:
        tempFirst = ""
        for prodForm in grammarElement[T]:
            tempFirst += getFirst(prodForm[0])
            if len(prodForm) >= 2 and prodForm[1] in non_ter and prodForm[0] in non_ter and 'ε' in grammarElement[prodForm[0]]:
                tempFirst += getFirst(prodForm[1])
        tempFirst = "".join(set(list(tempFirst)))
        firstSET.update({T: tempFirst})
        return tempFirst

def get_follow(T): #求follow集
    formule = []
    top = []
    for i  in grammarElement.keys():
        for j in grammarElement[i]:
            formule.append(j)
            top.append(i)
    for i in formule:
        for j in range(len(i)):
            if T == i[j]:
                if j == len(i) - 1  :  #非终结符在文法末尾
                    for e in followSET[top[formule.index(i)]]:
                        if e != 'ε'  and e not in followSET[T]:
                            followSET[T] += e
                    break
                else: #非终结符不在文法末尾
                    if i[j+1] in terSymblo and i[j+1] not in followSET[T]:
                        followSET[T] += i[j+1]
                        break
                    else: #后面跟着的是非终结符
                        next = i[j+1]
                        if 'ε' not in firstSET[next]:
                            for e in firstSET[next]:
                                if e not in followSET[T]:
                                    followSET[T] += e
                        else:
                            for e in firstSET[next]:
                                if e !=  'ε' and e not in followSET[T]:
                                    followSET[T] += e
                            for e in followSET[top[formule.index(i)]]:
                                if e != 'ε' and e not in followSET[T]:
                                    followSET[T] += e



def addtodict2(thedict, key_a, key_b, val): #设置二维字典
    if key_a in thedict.keys():
        thedict[key_a].update({key_b: val})
    else:
        thedict.update({key_a:{key_b: val}})

def analy():   #预测分析表
    for i in non_ter:
        for j in terSymblo:
            if j in firstSET[i]:
                if len(grammarElement[i])==1:
                    addtodict2(data,i,j,grammarElement[i][0])
                else:
                    for k in grammarElement[i]:
                        if j in k:
                            addtodict2(data,i,j,k)
            elif j in followSET[i]:
                addtodict2(data,i,j,'ε')
            else:
                addtodict2(data,i,j,'error')
    print("\t\t",end="")
    for i in terSymblo:
        print(i.ljust(8),end="")
    print()
    for i in data.keys():
        print(i.ljust(8),end="")
        for j in data[i]:
            if data[i][j] != 'error':
                temp = i+'->'+data[i][j]
                print(temp.ljust(8),end="")
            else:
                print('error'.ljust(8),end="")
        print()

def data_input(): #读取文法
    with open("input2.txt", 'r+', encoding="utf-8") as f:
        temp = f.readlines()
    for i in temp:
        line = str(i.strip("\n"))
        if line[0] not in non_ter :
            non_ter.append(line[0])
            grammarElement.setdefault(line[0],line[5:] )
        else:
            grammarElement[line[0]] += "|"+line[5:]
    for i in temp:
        line = str(i.strip("\n")).replace(" -> ","")
        for j in line:
            if j not in non_ter and j not in terSymblo:
                terSymblo.append(j)
    terSymblo.remove('ε')

data_input()
for i in non_ter:
    getGrammar(i)
sym = non_ter+terSymblo
for i in sym:
    if i ==Start:
        followSET.setdefault(i,"#")
    else:
        followSET.setdefault(i,"")
for i in non_ter:
    getFirst(i)
firstSET.setdefault(')',"")
for n in range(100):
    for i in non_ter:
        get_follow(i)

print("预处理:")
for i in grammarElement.keys():
    print(i+"->",end="")
    print(grammarElement[i])
#print(firstSET)
print("FIRST集合:")
for i in non_ter:
    print(i+" : "+firstSET[i])
#print(followSET)
print("FOLLOW集合:")
for i in non_ter:
    print(i+" : "+followSET[i])
data= dict()
print("预测分析表:")
analy()
#输入句子
instr = input("请输入需要进行分析的语句:\n")
instr+="#"
stack = []
stack.append('#')
stack.append(Start)
ind = 0
print("分析过程:\n符号栈\t\t\t输入串\t\t\t\t所用产生式")
try:
    while (1):
        print("".join(stack).ljust(16), end="")
        top = stack.pop()
        now = instr[ind]
        temp = ""
        for j in range(ind, len(instr)):
            temp += instr[j]
        print(temp.ljust(20), end="")

        if now == top:
            if now == "#":
                print()
                print("句子符合文法")
                flag = 1
                break
            else:
                ind += 1
                print()
                continue
        else:
            line = data[top][now]
            if line != 'ε' and line != 'error':
                temp = top + "->" + line
                print(temp.ljust(16))
                line = list(line)
                for i in range(len(line)):
                    s = line.pop()
                    stack.append(s)
                continue
            elif line == 'ε':
                temp = top + "->" + line
                print(temp.ljust(16))
                continue
            else:
                print()
                continue
except:
    print("\n句子不符合文法")

测试

测试文法:


S -> a
S -> ^
S -> (T)
T -> St
t -> ,St
t -> ε

运行
在这里插入图片描述
在这里插入图片描述
完成

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Chiaki_0ff

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值