正规式转NFA由以下几种基础的情况组成:
(1)单个字符
(2)并运算
(3)连接运算
(4)闭包运算
针对非操作符以及各种操作符的详细处理:
1' 当遇到左括号’(‘时:直接压入栈中即可;
2' 当遇到右括号')'时:依次弹出符号栈中的符号直到遇到'('为止。在依次弹出符号栈中的符号时对NFA栈中的NFA元素的操作是:弹出NFA栈顶的两个元素,进行相应的符号操作后合成一个新的NFA并压入栈中;
3' 当遇到或操作'|'时:此操作符的优先级最低,在压入栈时需要对符号栈中'('以上的符号进行判断,对于优先级高于或操作的连接操作需要将其先弹出后进行连接操作,直到栈中不存在连接操作后再将'|'压入符号栈中;
4' 当遇到闭包操作'*'时:此操作符的优先级最高,无须将其压入符号栈中,直接将NFA栈中的栈顶NFA弹出栈后进行闭包操作后再将新的NFA压入NFA栈;
5' 当遇到隐含的连接操作'.'时:该操作符是隐含在正规式中的 ,如:ab,a(b|c)*。因此在扫描过程中,需要对是否添加连接符进行判断。其有以下三种情况:当遇到非运算符时,需要对其后面的符号进行判断,若遇到左括号或非运算符时,则需要往符号栈中添加连接符'.';当遇到闭包运算符'*'时,需要判断其右边的符号,若非'|'和')'则需要在符号栈中天年假连接符'*';当遇到右括号')'时需要对其右边的符号进行判断,若遇到'('或非运算字符时需要加入连接符'.';
在处理完正规式中的字符后,若符号栈中仍有符号存在,则依次弹出符号栈中的元素和NFA中的NFA,不断进行计算后得到最终的NFA结果。
代码:
from stack import Stack
from queue import Queue
class NFANode: #描述一个结点的信息
def __init__(self,n,c):
self.stateNum = n
self.pathChar = c
self.nextNodes = []
class NFA: #描述一个NFA的信息,主要是头结点和尾结点
def __init__(self,headNode,tailNode):
self.headNode = headNode
self.tailNode = tailNode
# 为新的结点编号,从0开始
class GetStatusNum:
def __init__(self):
self.statusNum = -1
def getNum(self):
self.statusNum += 1
return self.statusNum
# 包含正规式转NFA的四种情况
class GenerateNFAMethod:
def __init__(self,gsn):
self.nul = 'E' # 空字
self.gsn = gsn
def MeetCharacter(self,c):
headNode = NFANode(self.gsn.getNum(),self.nul)
tailNode = NFANode(self.gsn.getNum(),c)
headNode.nextNodes.append(tailNode)
return NFA(headNode,tailNode)
#闭包运算
def Closure(self,oldNFA):
headNode = NFANode(self.gsn.getNum(), self.nul)
tailNode = NFANode(self.gsn.getNum(), self.nul)
oldHead = oldNFA.headNode
oldTail = oldNFA.tailNode
oldTail.nextNodes.append(tailNode)
oldTail.nextNodes.append(oldHead)
headNode.nextNodes.append(oldHead)
headNode.nextNodes.append(tailNode)
return NFA(headNode,tailNode)
# 连接运算
def And(self,firstNFA,secondNFA):
headNode = NFANode(self.gsn.getNum(), self.nul)
tailNode = NFANode(self.gsn.getNum(), self.nul)
secondNFA.tailNode.nextNodes.append(tailNode)
firstNFA.tailNode.nextNodes.append(secondNFA.headNode)
headNode.nextNodes.append(firstNFA.headNode)
return NFA(headNode,tailNode)
# 连接运算
def OR(self,firstNFA,secondNFA):
headNode = NFANode(self.gsn.getNum(), self.nul)
tailNode = NFANode(self.gsn.getNum(), self.nul)
firstNFA.tailNode.nextNodes.append(tailNode)
secondNFA.tailNode.nextNodes.append(tailNode)
headNode.nextNodes.append(firstNFA.headNode)
headNode.nextNodes.append(secondNFA.headNode)
return NFA(headNode,tailNode)
class GrammarToNFA:
def ToNFA(self,regExp):
operatorStack = Stack()
NFAStack = Stack()
gnm = GenerateNFAMethod(GetStatusNum())
char_list = [] #正规式中的字符集
length = len(regExp)
for i in range(length):
if(regExp[i] == '('):
operatorStack.push('(')
elif(regExp[i] == '|'):
while(operatorStack.peek() == '.'):
secondNFA = NFAStack.peek()
NFAStack.pop()
firstNFA = NFAStack.peek()
NFAStack.pop()
NFAStack.push(gnm.And(firstNFA,secondNFA))
operatorStack.pop()
operatorStack.push('|')
elif(regExp[i] == '*'):
nfa = NFAStack.peek()
NFAStack.pop()
NFAStack.push(gnm.Closure(nfa))
if (i != length - 1 and regExp[i + 1] != '|' and regExp[i + 1] != ')'):
operatorStack.push('.')
elif(regExp[i] == ')'):
while(operatorStack.peek() != '('):
secondNFA = NFAStack.peek()
NFAStack.pop()
firstNFA = NFAStack.peek()
NFAStack.pop()
if(operatorStack.peek() == '.'):
NFAStack.push(gnm.And(firstNFA,secondNFA))
else:
NFAStack.push(gnm.OR(firstNFA,secondNFA))
operatorStack.pop()
operatorStack.pop()
if (i != length - 1 and regExp[i + 1] != '|' and regExp[i + 1] != ')' and regExp[i + 1] != '*'):
operatorStack.push('.')
else:
if regExp[i] not in char_list:char_list.append(regExp[i])
NFAStack.push(gnm.MeetCharacter(regExp[i]))
if (i != length - 1 and regExp[i + 1] != '|' and regExp[i + 1] != ')' and regExp[i + 1] != '*'):
operatorStack.push('.')
while (not operatorStack.Empty()):
c = operatorStack.peek()
operatorStack.pop()
secondNFA = NFAStack.peek()
NFAStack.pop()
firstNFA = NFAStack.peek()
NFAStack.pop()
if (c == '.'):
NFAStack.push(gnm.And(firstNFA, secondNFA))
else:
NFAStack.push(gnm.OR(firstNFA, secondNFA))
li = [NFAStack.peek(),gnm.gsn.statusNum + 1,char_list]
return li
def main():
regExp = str(input("请输入正则式:"))
li = GrammarToNFA().ToNFA(regExp)
nfa = li[0]
visit = [False]*li[1]
print("--" + str(nfa.headNode.pathChar) + "-->" + str(nfa.headNode.stateNum))
print("\n")
# 广度优先遍历NFA的每个结点
Q = Queue()
Q.put(nfa.headNode)
while(not Q.empty()):
pre_node = Q.get()
status = pre_node.stateNum
if(not visit[status]):
visit[status] = True
for suf_node in pre_node.nextNodes:
print(str(status) + "--" + str(suf_node.pathChar) + "-->" + str(suf_node.stateNum))
print("\n")
if(not visit[suf_node.stateNum]):
Q.put(suf_node)
if __name__ == '__main__':
main()
测试:
请输入正则式:ABC
--E-->8
8--E-->0
0--A-->1
1--E-->6
6--E-->2
2--B-->3
3--E-->4
4--C-->5
5--E-->7
7--E-->9
上述结果的NFA如下图: