用
python
语言实现对
C
语言子集的源程序进行词法分析。通过输
入源程序从左到右对字符串进行扫描和分解,依次输出各个单词的内部编码及单
词符号自身值;若遇到错误则显示
“Error”
,然后跳过错误部分继续显示 ;同时
进行标识符登记符号表的管理。
以下是实现词法分析设计的主要工作:
1
)从源程序文件中读入字符。
2
)统计行数和列数用于错误单词的定位。
3
)删除空格类字符,包括回车、制表符空格。
4
)按拼写单词,并用(内码,属性)二元式表示。
(
属性值
——token
的机内
表示
)
5
)如果发现错误则报告出错
8
6
)根据需要是否填写标识符表供以后各阶段使用
。
代码:
class Analysis:
def __init__(self):
self.guanjianzi = [ "extern", "float", "for", "goto", "if", "int", "long",
"break", "case", "char", "const", "continue",
"do", "double", "else", "enum",
"return", "short", "signed", "sizeof", "static",
"struct", "switch", "unsigned", "void",
"while"]
self.guanxifenjiefu = ["+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
"!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
"&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
"}", "\\", ".", ":", "!"]
self.fenjiefu = [";", "(", ")", ",", "#", "[", "]", "{", "}", "\\"]
self.guanxifu = ["<", "<=", ">", ">=", "=", "==", "!=", "^", "&", "&&", "|", "||", "<<", ">>", "!"]
self.yunsuanfu = ["+", "-", "*", "/", "%", "~", "+=", "*=", "/=", "-=",
"++", "--"]
self.token = "" # 得到的单词
self.result = [] # 储存扫描得到的单词信息结果
self.dic = {'关键字': 2, '标识符': 1, "常数": 3, "算术运算符": 4, "关系运算符": 5, "字符串常量": 6,
"字符常量": 7, "分界符": 8, "ERROR": 9}
def Reserve(self, target): # 判断是否为关键字
if target in self.guanjianzi:
return True
return False
def Digit(self, inString, pos):
flag = False
for i in inString:
pos += 1
if i.isdigit():
self.token += str(i)
flag = True
elif i == '.' and i not in self.token and 'e' not in self.token and 'E' not in self.token:
self.token += str(i)
elif i == 'e' or i == 'E' and i not in self.token: # 浮点数特例
self.token += str(i)
else:
if i in self.guanxifenjiefu or i == ' ' or i == '\n':
flag = True
else:
flag = False
break
return flag, pos
def Mark(self, inString, pos): # 判断是否为标识符
flag = False
for i in inString:
pos += 1
if i.isalpha() or i.isdigit() or i == '_':
self.token += str(i)
flag = True
elif i in self.guanxifenjiefu:
flag = True
break
else:
flag = False
return flag, pos
def Char(self, inString, pos):
self.token += str(inString[0])
if len(inString) < 3:
return False, pos + len(inString)
else:
if inString[1].isalpha() and inString[2] == "\'":
self.token += str(inString[1])
self.token += str(inString[2])
if len(inString) > 3:
if inString[3] in self.fenjiefu or inString[3] in self.yunsuanfu:
pos = 3
return True, pos
else:
return True, 3
else:
return False, 3
def String(self, inString, pos):
flag = False
self.token += str(inString[0])
for i in inString[1:]:
pos = pos + 1
if i == '\"':
self.token += str(i)
flag = True
break
if i.isalpha():
self.token += str(i)
else:
return False, pos
return flag, pos
def Operator(self, inString, pos):
if len(inString) == 1:
self.token += str(inString[0])
return pos
for i in inString[0:]:
pos += 1
if i in self.guanxifenjiefu:
self.token += str(i)
else:
break
return pos
def scan(self, input, row, col): # row:当前扫描的行,col:当前扫描的列,input:待处理字符串
input = str(input).strip()
self.token = ""
# 数字开头
if input[0].isdigit():
judge, index = self.Digit(input, 0)
if judge:
self.result.append([self.token, "常数", (row, col)])
else:
print(index)
self.result.append([self.token, "ERROR", (row, col)])
if index < len(input) and index - 1 > 0:
if index == len(input):
self.scan(input[len(input) - 1], row, col)
else:
self.scan(input[index - 1:], row, col)
# 字符开头
elif input[0].isalpha():
judge, index = self.Mark(input, 0)
if self.Reserve(self.token):
self.result.append([self.token, "关键字", (row, col)])
else:
self.result.append([self.token, "标识符", (row, col)])
if index <= len(input) and not input[index - 1].isalpha():
self.scan(input[index - 1:], row, col)
elif input[0] == '\"':
judge, index = self.String(input, 0)
index = index + 1
if judge:
self.result.append([self.token, "字符串常量", (row, col)])
if index < len(input) and index - 1 >= 0:
self.scan(input[index - 1:], row, col)
elif input[0] == '\'':
judge, index = self.Char(input, 0)
if judge:
self.result.append([self.token, "字符常量", (row, col)])
if len(input) > index > 0:
self.scan(input[index:], row, col)
# 符号开头
elif input[0] in self.yunsuanfu or input[0] in self.guanxifu:
index = self.Operator(input, 0)
if self.token in self.yunsuanfu:
self.result.append([self.token, "算术运算符", (row, col)])
elif self.token in self.guanxifu:
self.result.append([self.token, "关系运算符", (row, col)])
else:
self.result.append([self.token, "ERROR", (row, col)])
if index <= len(input) and index - 1 >= 0:
self.scan(input[index - 1:], row, col)
elif input[0] in self.fenjiefu:
if len(input) == 1:
self.token += str(input[0])
self.result.append([self.token, "分界符", (row, col)])
elif len(input) > 1 and input[1] in self.fenjiefu: # 分隔符仅为一个字符
self.token += str(input)
self.result.append([self.token, "ERROR", (row, col)])
elif len(input) > 1 and input[1] not in self.fenjiefu:
self.token += str(input[0])
self.result.append([self.token, "分界符", (row, col)])
self.scan(input[1:], row, col)
if __name__ == "__main__":
analysis = Analysis()
content = input("请输入代码:").split(' ')
col = 1
for s in content:
analysis.scan(s, 1, col)
col += 1
for res in analysis.result:
print(res)