CYK 算法 Python 实现
原理参考:https://blog.csdn.net/ssaalkjhgf/article/details/80435676
实现过程
类说明
CNF
由一个字典初始化,输入为规范的范式
find_product
方法实现输入一个变元组合返回其生成变元
CYK
由一个CNF初始化,find
方法用于判断字符串是否在CNF中
并保存推导过程,即动态规划数组
Input
实现自己的输入函数,一直等待输入直到输入EOF(Ctrl + Z)
并对输入字符串进行简单处理,切分
输入字符串格式为:A -> BC|AD|b
其中空格可含可不含
Visualization
可视化类,将推导过程输出到.csv
文件中
将.csv
文件使用Excel打开便可查看完整推导过程
代码中已有完整注释和使用说明
代码
'''
Used for judging a string whether be in the CFL (presented in CNF) using CYK algorithm
Author: JOke-Lin
Time: 2019-4-27
Vision: Python 3.7
Introduction:
For reading CNF you should use the format "A -> BC|a|b" (the blank space is allowed except "->")
The first character will be considered as CFL's Start Symbol
And end inputing by input "Ctrl + Z"
For testing strings, you can input as many as strings you want
And exit by using "ctrl + Z"
For Output, the output format is the .csv file whcih should be checked by Excel
The output file name can be changed in codes (the default is "result.csv")
Attention:
When you test a new string, you must closed the last .csv file
Otherwise you will meet a I/O Error because the read-write conflict
'''
class CNF:
def __init__(self, cnf: dict):
''' cnf: [str:list] '''
self.cnf = cnf
self.S = list(cnf.keys())[0]
def find_product(self, target: str):
''' find the productor '''
res = []
for key, value in self.cnf.items():
if target in value:
res.append(key)
return res
class CYK:
def __init__(self, cnf: CNF):
self.cnf = cnf
def find(self, target: str):
''' judge if the target is in the CNF '''
self.dp_mat = [[""]*len(target) for i in range(len(target))]
# initialize
for i in range(len(target)):
self.dp_mat[i][i] = list(set(self.cnf.find_product(target[i])))
for length in range(2, len(target) + 1):
# i,j are the position in dp_mat
for i in range(len(target) - length + 1):
j = i + length - 1
temp = []
# split the sub_target
for k in range(i, j):
list1 = self.dp_mat[i][k]
list2 = self.dp_mat[k+1][j]
temp += self.find_product(list1, list2)
self.dp_mat[i][j] = list(set(temp))
if self.cnf.S in self.dp_mat[0][len(target) - 1]:
return True
return False
def find_product(self, list1: list, list2: list):
''' find all productions from Cartesian product of two lists '''
res = []
for i in range(len(list1)):
for j in range(len(list2)):
temp = self.cnf.find_product(list1[i]+list2[j])
if temp != None:
res += temp
return res
class Input:
''' my input class for reading the CNF '''
def getDictItemFromInput(self, s: str):
s = s.replace(' ', '') # delete extra blank spacing
temp = s.split("->")
return temp[0], temp[1].split("|")
def getInput(self):
''' return a dict CNF '''
res = {}
while True:
try:
s = input()
dict_item = self.getDictItemFromInput(s)
res[dict_item[0]] = dict_item[1]
except:
break
return res
class Visualization:
'''
make the deduction visualized
use dp_mat from class CYK to initialize
and creat .csv file to show in Excel
'''
def __init__(self, data: list):
self.data = [[""]*len(data) for i in range(len(data))]
for i in range(len(data)):
temp = data[i][i:]
for j in range(len(data) - i):
self.data[len(data) - j - 1][i] = temp[j]
def seeAsCsv(self, filename: str):
''' write data to filename as .csv '''
with open(filename, "w") as f:
for i in range(len(self.data)):
line = ""
for j in range(len(self.data[i])):
temp = ""
if type(self.data[i][j]) == str:
temp = self.data[i][j]
else:
temp = "\"X{},{}=".format(j+1,len(self.data)-i)+"{"
for k in range(len(self.data[i][j])):
temp = temp + self.data[i][j][k]
if k != len(self.data[i][j]) - 1:
temp = temp + ","
temp = temp + "}\""
line = line + temp + ","
f.write(line + "\n")
if __name__ == "__main__":
print("Please Input the CNF: (The First Character must be Start symbol until Ctrl+Z )")
myinput = Input()
# get the CNF read until miss ctrl + Z
data_dict = myinput.getInput()
# creat my CYK algorithm
cyk = CYK(CNF(data_dict))
print("input the string: (until Ctrl+Z)")
# test the input string until read the ctrl + z
while True:
try:
target_str = input()
if cyk.find(target_str):
print("Yes")
else:
print("No")
visual = Visualization(cyk.dp_mat)
try:
visual.seeAsCsv("result.csv")
except:
print("Write Error: Please Close Last csv file")
except:
break