文法如下:
具体题目参见张莉老师的《编译原理及编译程序构造》3.4节词法分析程序的设计与实现
代码来了:
import sys
list=[]
list_new=[]
for line in sys.stdin:
list_new=line.split()
list.extend(list_new)
#print(list)
#建关键字表
dict_key={
'BEGIN': 1,
'END': 2,
'IF': 3,
'THEN': 4,
'ELSE': 5,
}
dict_op={
'+': 22,
'-': 23,
'*': 24,
'/': 25,
'(': 26,
')': 27,
',': 28,
';': 29,
':': 30,
':=': 31,
'=': 32
}
#分别生成小写字母表,大写字母表,数字表
letter_u = [chr(i) for i in range(97, 123)]
letter_s = [chr(i) for i in range(65, 91)]
letter = letter_s+letter_u
digit = [chr(i) for i in range(48, 58)]
def check_of(token):
if int(token) > 2147483647:
return True
else:
return False
flag = 0 #flag用于标记当前item是否位于注释内容中,取0代表不在注释内容中,取1代表在注释内容中
err = 0 #err用于标记当前语法中是否有错误,取0表示无,取1表示有
# list=['3::=3']
for item in list:
token = ''
flag_t = 0 #这个变量用来标识当前已读token是标识符还是数字,取0表示当前token为空或者是标识符;取1表示当前token为数字
i = 0
while i < len(item):
# print('i=', i,'token=',token)
step = 1
t = item[i]
# print('t=', t, ',flag=', flag)
#1.识别标识符和关键字
if t in letter and flag == 0:
#如果当前token是数字,则先输出token然后置空token
if flag_t == 1:
print('21', 'OF' if check_of(token) else token)
token = ''
flag_t = 0
token += t
if i == len(item)-1:
if token in dict_key.keys():
print(dict_key[token])
else:
print('20', token)
token = ''
elif t in digit and flag == 0:
if len(token) == 0:
flag_t = 1
token += t
if i == len(item)-1 and flag_t == 1:
print('21', 'OF' if check_of(token) else token)
token = ''
if i == len(item) - 1 and flag_t == 0:
print('20', token)
token = ''
#如果识别到运算符
elif t in dict_op.keys() and flag == 0:
# print('t=', t)
# 如果当前token是数字
if flag_t == 1:
print('21', 'OF' if check_of(token) else token)
token = ''
#如果当前token是标识符
if flag_t == 0 and len(token) != 0:
print('20', token)
token = ''
flag_t = 0
token = t
if t == ':' and i <= len(item)-1-1:
if item[i+1] == '=':
token += '='
step = 2
if t == '/' and i <= len(item)-1-1:
if item[i+1] == '*':
flag = 1
step = 2
token = ''
if flag == 0:
print(dict_op[token])
token = ''
elif t == '*' and i <= len(item) - 1 - 1 and item[i + 1] == '/':
if flag == 1:
flag = 0
step = 2
else:
print(dict_op['*'])
print(dict_op['/'])
token=''
#如果识别到了不明物体:
elif flag == 0:
# 如果当前token是数字
if flag_t == 1:
print('21', 'OF' if check_of(token) else token)
token = ''
#如果当前token是标识符
if flag_t == 0 and len(token) != 0:
print('20', token)
token = ''
print('-1', t)
err = 1
break
i += step
if err == 1:
break
if flag == 1:
print('-1 incomplete comment')
GitHub代码在这:github