后面写的有些随便,过于冗杂
# 用来完成编译原理第一章节词法分析实验报告
# 新增正则表达式
import re
class Stack:#构建栈
def __init__(self):
self.st=[]
def push(self,info):#入栈
self.st.append(info)
def pop(self):#出栈
self.st.pop()
def return_string(self):#返回字符串
return ''.join(self.st)
def clear(self):#清除栈
self.st.clear()
k_array=['if','for','while','printf','then']#关键字表,单词种别码为1
s_array=[[';',',','(',')','[',']'],['+','-','*','/'],['<','<=','=','>','>=','<>']]#0 for 分界符;种类码为2
# 1 for 算数运算符;种类码为3
#2 for 关系运算符;单词种别码为4
id_array=['i','n','a']#存放标识符,单词种别码为6
ci_array=[]#存放常数,单词种类码为5
class word_analyse:
def __init__(self):#初始化
self.sentence=''
f = open("context.txt", 'r')
while True:
line = f.readline()
if line:
self.sentence=self.sentence+line
else:
break
f.close()
def read(self):#读取输入语句,将分好的单词寄存在nametoken,类型分在typetoken,位置放在position中
self.nametoken = [] # 输入单词
self.typetoken = [] # 和nametoken对应的单词种别
self.position=[] #存放单词的位置
stack=Stack()
while(len(self.sentence)!=0):
if(self.sentence[0].isalpha()):
med = 0
while (self.sentence[med].isalnum()):#根据第一个字母来寻找关键字or标识符
stack.push(self.sentence[med])
med=med+1
if stack.return_string() in k_array:
self.nametoken.append(stack.return_string())
self.typetoken.append(1)
elif stack.return_string() in id_array:
self.nametoken.append(stack.return_string())
self.typetoken.append(6)
else:
id_array.append(stack.return_string())
self.nametoken.append(stack.return_string())
self.typetoken.append(6)
stack.clear()
self.sentence = self.sentence[med:]
continue
elif self.sentence[0].isdigit():#根据第一个字符为数字来使用栈来判断数字的合法性
med=0
while(not self.sentence[med].isspace()):
stack.push(self.sentence[med])
med=med+1
reg = re.compile(r'^[-+]?[0-9]+\.[0-9]+$')
res = reg.match(stack.return_string())
if res is True:
self.nametoken.append(stack.return_string())
self.typetoken.append(5)
elif 'e+' in stack.return_string() or 'e-' in stack.return_string():
self.nametoken.append(stack.return_string())
self.typetoken.append(5)
else:
self.nametoken.append(stack.return_string())
self.typetoken.append('Error')
stack.clear()
self.sentence = self.sentence[med:]
continue
elif self.sentence[0:2]=='/*': #消除注释
while(self.sentence[0:2]!='*/'):
self.sentence=self.sentence[1:]
self.sentence=self.sentence[2:]
continue
elif self.sentence[0].isspace():#清理空格
self.sentence=self.sentence[1:]
continue
elif (not self.sentence[0].isalpha()) and (not self.sentence[0].isdigit()):
if self.sentence[0] in s_array[0]:#关于分界符,即时多个分界符在一起无需栈来分辨是否多个栈在一起
self.nametoken.append(self.sentence[0])
self.typetoken.append(2)
self.sentence = self.sentence[1:]
continue
elif self.sentence[0] in s_array[1]:#关于算数运算符,有多个算数运算符在一起的情况
med = 0
while (self.sentence[med] in s_array[1]):
stack.push(self.sentence[med])
med = med + 1
if stack.return_string() not in s_array[1]:
self.nametoken.append(stack.return_string())
self.typetoken.append('Error')
else:
self.nametoken.append(stack.return_string())
self.typetoken.append(3)
stack.clear()
self.sentence = self.sentence[med:]
continue
elif self.sentence[0] in s_array[2]:#关于关系运算符,单一or多个
med = 0
while (self.sentence[med] in s_array[2]):
stack.push(self.sentence[med])
med = med + 1
if stack.return_string() not in s_array[2]:
self.nametoken.append(stack.return_string())
self.typetoken.append('Error')
else:
self.nametoken.append(stack.return_string())
self.typetoken.append(4)
stack.clear()
self.sentence = self.sentence[med:]
continue
else:#剩下的基本上就是error了
self.nametoken.append(self.sentence[0])
self.typetoken.append('Error')
self.sentence = self.sentence[1:]
continue
def get_position(self):#字符集合获取相关的位置
x,y=1,0
for i in range(len(self.nametoken)):
if self.nametoken[i]==';':
y=y+1
self.position.append((x, y))
x=x+1
y=1
else:y=y+1
self.position.append((x,y))
def return_type_name(type):#反过来通过种类符号来推出其的单词汉语
dy = {1: '关键字', 2: '分界符', 3: '算数运算符', 4: '关系运算符', 5: '常数', 6: '标识符', 'Error': 'Error'}
return dy[type]
def show():#
test = word_analyse()
test.read()
test.get_position()
show_time = {}
def return_binary(a, b):
if a=='Error':
return a
else:
return (a, b)
for i in range(len(test.nametoken)):
show_time.setdefault(i, []).append(test.nametoken[i])
show_time.setdefault(i, []).append(return_binary(test.typetoken[i], test.nametoken[i]))
show_time.setdefault(i, []).append(return_type_name(test.typetoken[i]))
show_time.setdefault(i, []).append(test.position[i])
print('单词,二元序列,类型,位置(行,列)')
for i in range(len(test.nametoken)):
print(show_time[i])
if __name__ =="__main__":
show()