前言
学校编译原理实验课,题目是确定有穷自动机DFA最小化,在网上找了很久没有一个看得懂并且可以使用的Python代码,于是自己写了个能运行的。
原理
说人话就是:不断遍历每一个划分,然后判断划分中的状态是不是等价,如果不等价就拆开,最后拆到最简形式。例如[[a,b,c], [d,e]]
如果d
和e
不等价,那么拆分为[[a,b,c], [d], [e]]
。那么如何判断两个状态是否等价呢?我们需要遍历所有符号,假如对于符号0
, d通过0到达bd[0]-> b
,b属于划分[a,b,c]
,而e通过符合0到达de[0] -> d
,d属于划分[d,e]
所以d和e的到达状态属于不同划分,那么他们就不等价。假如对于所有符合,他们的到达状态都在同一个划分就等价。(ps:可以看到我的代码中有一个find_index
函数,就是用来寻找状态在二维列表中的索引,我通过字典来判断那个状态对应那个索引,相同索引就是在同一个划分就是等价的)
步骤
- 确定等价状态对
- 合并等价状态
- 更新转换函数和终止状态
代码
#! /usr/bin/env python
# -*- coding: utf-8 -*-#-
from typing import Dict, List
from graphviz import Digraph
from pathlib import Path
class mini_dfa:
def __init__(self, path: str):
self.transitions: Dict[str, dict] = {}
self.states: List[str] = []
self.symbols: List[str] = []
self.starts: str = None
self.ends: List[str] = []
self.file_path = Path(path)
def loads(self) -> None:
"""加载DFA数据"""
if not self.file_path.exists():
raise Exception("file doesn't exist")
with open(self.file_path, 'r') as r:
lines = [line.rstrip('\n') for line in r.readlines()]
self.states = lines[0].split(',')
self.symbols = lines[1].split(',')
self.starts = lines[3]
self.ends = lines[4].split(',')
temp = lines[2].split(';')
transitions = [x.split(',') for x in temp]
mini_dfa.show_dfa(self.states, self.starts, self.ends, transitions)
# 将转换函数变成字典
for transition in transitions:
start, end, symbol = transition
if start not in self.transitions:
self.transitions[start] = {}
self.transitions[start][symbol] = end
@staticmethod
def show_dfa(states: list, starts: list, ends: list, transitions: List[list]) -> None:
"""生成DFA图"""
dfa = Digraph('DFA', format='png')
for state in states:
if state in starts:
dfa.node(state, shape='circle', color='red')
elif state in ends:
dfa.node(state, shape='doublecircle')
else:
dfa.node(state, shape='circle')
for edge in transitions:
dfa.edge(edge[0], edge[1], label=edge[2])
dfa.attr(rankdir='LR')
dfa.view()
@staticmethod
def find_index(state, p_list) -> int | str:
"""找到状态在二维状态列表中索引"""
if state == '*':
return state
for index, s_list in enumerate(p_list):
if state in s_list:
return index
def minimize(self) -> list:
"""最小化DFA"""
non_end_states = [state for state in self.states if state not in self.ends] # 非终止状态 [状态对 - 终止状态]
partitions = [non_end_states, self.ends] # 初始划分 = [非终止状态, 终止状态]
while True: # 重复划分,直到没有新的划分
new_partitions = []
remove_partition = None
for partition in partitions: # 处理每个划分
divided = False
for symbol in self.symbols:
partition_dict = {}
for state in partition:
next_state = self.transitions[state].get(symbol, '*')
idx = mini_dfa.find_index(next_state, partitions)
if idx not in partition_dict:
partition_dict[idx] = []
partition_dict[idx].append(state)
if len(partition_dict.keys()) > 1: # 表示进行划分了,那么不用判断其他符号
new_partitions = list(partition_dict.values())
divided = True
break
if divided:
remove_partition = partition
break
if remove_partition is not None and len(new_partitions) > 0:
partitions.remove(remove_partition)
partitions.extend(new_partitions)
else:
break
# 得到最小化后的状态
minimized_states = [partition[0] for partition in partitions] # 取等价类第一个状态作代表
# 得到最小化后的初态
minimized_starts = [state for state in minimized_states if state in self.starts]
# 得到最小化后的终态
minimized_ends = [state for state in minimized_states if state in self.ends]
# 得到最小化后的转换函数
minimized_transforms = []
for state in minimized_states:
for symbol, end in self.transitions[state].items():
idx = mini_dfa.find_index(end, partitions) # 查找到达状态所在等价类的索引
eq_end = partitions[idx][0] # 该索引第一个状态
minimized_transforms.append([state, eq_end, symbol])
return [minimized_states, minimized_starts, minimized_ends, minimized_transforms]
if __name__ == "__main__":
cls = mini_dfa('dfa.txt')
cls.loads()
result = cls.minimize()
cls.show_dfa(*result)