词法分析设计python实现

python  语言实现对 C 语言子集的源程序进行词法分析。通过输
入源程序从左到右对字符串进行扫描和分解,依次输出各个单词的内部编码及单
词符号自身值;若遇到错误则显示 “Error” ,然后跳过错误部分继续显示 ;同时
进行标识符登记符号表的管理。
以下是实现词法分析设计的主要工作:
1 )从源程序文件中读入字符。
2 )统计行数和列数用于错误单词的定位。
3 )删除空格类字符,包括回车、制表符空格。
4 )按拼写单词,并用(内码,属性)二元式表示。 ( 属性值 ——token 的机内
表示 )
5 )如果发现错误则报告出错 8
6 )根据需要是否填写标识符表供以后各阶段使用
代码:
class Analysis:
    def __init__(self):
        self.guanjianzi = [ "extern", "float", "for", "goto", "if", "int", "long",
                            "break", "case", "char", "const", "continue",
                            "do", "double", "else", "enum",
                             "return", "short", "signed", "sizeof", "static",
                            "struct", "switch",  "unsigned", "void",
                             "while"]

        self.guanxifenjiefu = ["+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
                                    "!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
                                    "&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
                                    "}", "\\", ".", ":", "!"]
        self.fenjiefu = [";", "(", ")", ",", "#", "[", "]", "{", "}", "\\"]
        self.guanxifu = ["<", "<=", ">", ">=", "=", "==", "!=", "^", "&", "&&", "|", "||", "<<", ">>", "!"]
        self.yunsuanfu = ["+", "-", "*", "/", "%", "~", "+=", "*=", "/=", "-=",
                         "++", "--"]

        self.token = ""  # 得到的单词
        self.result = []  # 储存扫描得到的单词信息结果
        self.dic = {'关键字': 2, '标识符': 1, "常数": 3, "算术运算符": 4, "关系运算符": 5, "字符串常量": 6,
                    "字符常量": 7, "分界符": 8, "ERROR": 9}

    def Reserve(self, target):  # 判断是否为关键字
        if target in self.guanjianzi:
            return True
        return False

    def Digit(self, inString, pos):
        flag = False
        for i in inString:
            pos += 1
            if i.isdigit():
                self.token += str(i)
                flag = True
            elif i == '.' and i not in self.token and 'e' not in self.token and 'E' not in self.token:
                self.token += str(i)
            elif i == 'e' or i == 'E' and i not in self.token: # 浮点数特例
                self.token += str(i)
            else:
                if i in self.guanxifenjiefu or i == ' ' or i == '\n':
                    flag = True
                else:
                    flag = False
                break
        return flag, pos

    def Mark(self, inString, pos): # 判断是否为标识符
        flag = False
        for i in inString:
            pos += 1
            if i.isalpha() or i.isdigit() or i == '_':
                self.token += str(i)
                flag = True
            elif i in self.guanxifenjiefu:
                flag = True
                break
            else:
                flag = False
        return flag, pos

    def Char(self, inString, pos):
        self.token += str(inString[0])
        if len(inString) < 3:
            return False, pos + len(inString)
        else:
            if inString[1].isalpha() and inString[2] == "\'":
                self.token += str(inString[1])
                self.token += str(inString[2])
                if len(inString) > 3:
                    if inString[3] in self.fenjiefu or inString[3] in self.yunsuanfu:
                        pos = 3
                        return True, pos
                else:
                    return True, 3
            else:
                return False, 3


    def String(self, inString, pos):
        flag = False
        self.token += str(inString[0])
        for i in inString[1:]:
            pos = pos + 1
            if i == '\"':
                self.token += str(i)
                flag = True
                break
            if i.isalpha():
                self.token += str(i)
            else:
                return False, pos
        return flag, pos

    def Operator(self, inString, pos):
        if len(inString) == 1:
            self.token += str(inString[0])
            return pos
        for i in inString[0:]:
            pos += 1
            if i in self.guanxifenjiefu:
                self.token += str(i)
            else:
                break
        return pos

    def scan(self, input, row, col):  # row:当前扫描的行,col:当前扫描的列,input:待处理字符串
        input = str(input).strip()
        self.token = ""
        # 数字开头
        if input[0].isdigit():
            judge, index = self.Digit(input, 0)
            if judge:
                self.result.append([self.token, "常数", (row, col)])
            else:
                print(index)
                self.result.append([self.token, "ERROR", (row, col)])
            if index < len(input) and index - 1 > 0:
                if index == len(input):
                    self.scan(input[len(input) - 1], row, col)
                else:
                    self.scan(input[index - 1:], row, col)
            # 字符开头
        elif input[0].isalpha():
            judge, index = self.Mark(input, 0)
            if self.Reserve(self.token):
                self.result.append([self.token, "关键字", (row, col)])
            else:
                self.result.append([self.token, "标识符", (row, col)])
            if index <= len(input) and not input[index - 1].isalpha():
                self.scan(input[index - 1:], row, col)
        elif input[0] == '\"':
            judge, index = self.String(input, 0)
            index = index + 1
            if judge:
                self.result.append([self.token, "字符串常量", (row, col)])
            if index < len(input) and index - 1 >= 0:
                self.scan(input[index - 1:], row, col)
        elif input[0] == '\'':
            judge, index = self.Char(input, 0)
            if judge:
                self.result.append([self.token, "字符常量", (row, col)])
            if len(input) > index > 0:
                self.scan(input[index:], row, col)
        # 符号开头
        elif input[0] in self.yunsuanfu or input[0] in self.guanxifu:
            index = self.Operator(input, 0)
            if self.token in self.yunsuanfu:
                self.result.append([self.token, "算术运算符", (row, col)])
            elif self.token in self.guanxifu:
                self.result.append([self.token, "关系运算符", (row, col)])
            else:
                self.result.append([self.token, "ERROR", (row, col)])
            if index <= len(input) and index - 1 >= 0:
                self.scan(input[index - 1:], row, col)
        elif input[0] in self.fenjiefu:
            if len(input) == 1:
                self.token += str(input[0])
                self.result.append([self.token, "分界符", (row, col)])
            elif len(input) > 1 and input[1] in self.fenjiefu:  # 分隔符仅为一个字符
                self.token += str(input)
                self.result.append([self.token, "ERROR", (row, col)])
            elif len(input) > 1 and input[1] not in self.fenjiefu:
                self.token += str(input[0])
                self.result.append([self.token, "分界符", (row, col)])
                self.scan(input[1:], row, col)


if __name__ == "__main__":
    analysis = Analysis()
    content = input("请输入代码:").split(' ')
    col = 1
    for s in content:
        analysis.scan(s, 1, col)
        col += 1
    for res in analysis.result:
        print(res)

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值