python 实现一个简单的C语言词法分析
# 定义一个字典或者列表与其中的字符串进行输出来的进行匹配
import sys
map_key = {
"begin", "if", "then", "while", "do", "end", "int", "main",
"else", "float", "double", "return", "cout", "<=", ">=", "==",
"!=", "<<", ">>", "/*", "include","*/"
}
map_sym = {
"(", ")", "{", "}", "+", "-", "*", "=", "/", ">", "<", ";", ":", ",", "#"
}
# 写一个编码字典
map_data = {
"begin": 1, "if": 2, "then": 3, "while": 4, "do": 5, "end": 6, "int": 7, "main": 8, "else": 50,
"float": 51, "double": 52, "return": 12, "cout": 13, "<=": 38, ">=": 37, "==": 21, "!=": 40,
"<<": 53, ">>": 54, "/*": 55, "(": 26, ")": 27, "{": 28, "}": 29, "+": 22, "-": 23, "*": 24,
"=": 25, "/": 25, ">": 35, "<": 36, ";": 34, ":": 33, ",": 32, "#": 0, "include": 56,"*/":57
}
def data_map(a, b): # a代表要比较的字符 b 代表map
for i in range(len(a)):
if a[i] in b:
print(a[i], map_data[a[i]])
# 判断字母或数字,可以利用异常捕获,系统自带的String.isdigit()的方法,
# 该方法用于判定输入的字符串是否为纯数,但是一旦是浮点类型就会返回false
def data_is_number(a):
try:
float(a)
return True
except ValueError:
pass
else:
return True
def data_start(a, b):
if a.startswith('/*'):
return a
elif a.endswith('*/'):
return '*/'
else:
for i in b:
if a.startswith(i):
print(i, map_data[i])
a = a.replace(i, '')
return a
# 打开需要检测的C程序txt文本文件
with open("cc.txt", "r", encoding='utf-8') as f:
data = f.readlines()
# print(data) # 测试是否正常输出
f.close()
# 删除字符串中的 \n
for i in range(len(data)):
data_strip = data[i].replace('\n', '').strip().split() # 删除\n 与 空白 并将其分割成字符块
for i in range(len(data_strip)):
data_strip[i] = data_start(data_strip[i], map_sym) # 解决头文件形式
if data_strip[i] in map_key: # 判断字符在map中嘛
print(data_strip[i], map_data[data_strip[i]])
else:
# 解决 main()的问题
data_str = data_strip[i]
for i in map_key:
if data_str.startswith(i):
data_str = data_str.replace(i, '')
print(i, map_data[i])
# 这里开始对字符串进行列表化处理
data_data = list(data_str)
for i in range(len(data_data)):
if data_is_number(data_data[i]):
if data_data[i+1]:
if data_data[i+1].encode('utf-8').isalpha():
print(data_data[i], 10)
print('该程序存在词法错误')
sys.exit(0)
else:
print(data_data[i],10)
elif data_data[i] in map_sym:
print(data_data[i],map_data[data_data[i]])
elif data_data[i].encode('utf-8').isalpha():
print(data_data[i],20)