python基础及实现词法分析器的基本实现

最新推荐文章于 2024-08-19 20:41:37 发布

ji_r

最新推荐文章于 2024-08-19 20:41:37 发布

阅读量1.4w

点赞数 4

文章标签：词法分析

本文链接：https://blog.csdn.net/kobe_jr/article/details/80442431

版权

python基础：

1、list的使用，即列表。定义：list(列表名) = []；如下还有对他的遍历，这里的word算是定义了一个变量去存储res[i]的值：

# -*- coding: cp936 -*-

if __name__=="__main__":
    res = []
    res = ["hello", "you", "are", "good"]

    i = 0

#对列表的遍历
    for word in res:
        print("第", i ,"位置的元素", word)

        i = i + 1

2、缩进问题: 一般默认为4个空格的缩进，可以使用tab键或者空格，但是注意如果使用混乱，可能出现unexcepted indent之类的错误。

3、注释：使用#号，如果是多行注释则是''' XXX '''（头尾三个单引号，注意中英文的区别）。

4、函数定义：def 函数名；或者利用函数对象的__name__属性： if __name__=="__main__"。

5、正则表达式的使用：

import re ：使用python自己的正则表达式模块，其中有对应的方法。（下面编写了不少正则表达式，可以参考）。这里举常见的一个例子：根据空格分隔字符串：

# -*- coding: cp936 -*-

import re

if __name__=="__main__":

    temp = 'aaa bbb   ccc ddd    eee'

    res = re.split('\s+', temp)

    print(res)

6、读写文件（使用的是with/ 还可以使用open- read- close）：

# -*- coding: cp936 -*-

if __name__=="__main__":

    filename = 'D:\demo.txt'

    filename2 = 'D:\demo1.txt'
    with open(filename, "r") as fin:
        
        res = [line.strip() for line in fin]
        print(res)

        #for line in fin:
             #print(line)

    with open(filename2, "a") as fo:
        #这里写在txt文件式一行，因为它是按行读的。
        for line in res:
            if line:
                fo.write(line)
            else:
                continue

词法分析：

是编译原理中第一步需要做的事，将给定的程序或者其他读取，根据词的类别不同，进行分类。即可以理解将一段程序分成一个个的词。

先读取test.c的内容，存进一个列表中，然后通过和数个正则表达式进行match匹配，从而进行分类。这里分类还是存在一些小的问题，欢迎提出更优的解决方案。谢谢!

待读的代码：

#include <studio.h>
int main(int argc, char const *argv[])
{
	char *str = "String123";
	//This is comment
	/**
	printf("NULL\n");
	*/
	
	int floatnum = 123.4.56;
    // If是语法错误，不做词法错误处理
	If(6.4 <= 3E-1){
		printf("Yes\n");}	
	return 0;
}

参考代码：

#!/usr/bin/python
# -*- coding: cp936 -*-
#ex2.py


import re

class Token(object):

    #初始化
    def __init__(this):
         #存储分词的列表
        this.results = []

        #行号
        this.lineno = 1

        #关键字
        this.keywords = ['auto', 'struct', 'if', 'else', 'for', 'do', 'while', 'const',
            'int', 'double', 'float', 'long', 'char', 'short', 'unsigned',
            'switch', 'break', 'defalut', 'continue', 'return', 'void', 'static',
            'auto', 'enum', 'register', 'typeof', 'volatile', 'union', 'extern']
        '''
    regex中：*表示从0-， +表示1-， ？表示0-1。对应的需要转义
    { 表示限定符表达式开始的地方 \{
    () 标记一个子表达式的开始和结束位置。子表达式可以获取共以后使用：\( \)
    r表示原生字符串。
    '''  

        Keyword = r'(?P<Keyword>(auto){1}|(double){1}|(int){1}|(if){1}|' \
		          r'(#include){1}|(return){1}|(char){1}|(stdio\.h){1}|(const){1})'
        #运算符
        Operator = r'(?P<Operator>\+\+|\+=|\+|--|-=|-|\*=|/=|/|%=|%)'

         #分隔符/界符
        Separator = r'(?P<Separator>[,:\{}:)(<>])'
        
         #数字: 例如：1 1.9
        Number = r'(?P<Number>\d+[.]?\d+)'

         #变量名 不能使用关键字命名
        ID = r'(?P<ID>[a-zA-Z_][a-zA-Z_0-9]*)'

         #方法名 {1} 重复n次
        Method = r'(?P<Method>(main){1}|(printf){1})'

         #错误  \S 匹配任意不是空白符的字符
        #Error = r'(?P<Error>.*\S+)'
        Error = r'\"(?P<Error>.*)\"'

         #注释  ^匹配行的开始 .匹配换行符以外的任意字符 \r回车符 \n换行符
        Annotation = r'(?P<Annotation>/\*(.|[\r\n])*/|//[^\n]*)'

         #进行组装，将上述正则表达式以逻辑的方式进行拼接, 按照一定的逻辑顺序
         # compile函数用于编译正则表达式，生成一个正则表达式对象
        this.patterns = re.compile('|'.join([Annotation, Keyword, Method, ID, Number, Separator, Operator, Error]))

       
    #读文件
    def read_file(this, filename):
         with open(filename, "r") as f_input:
               return [line.strip() for line in f_input]

    #结果写入文件
    def write_file(this, lines, filename = 'D:/results.txt'):
        with open(filename, "a") as f_output:
                for line in lines:
                    if line:
                        f_output.write(line)
                    else:
                        continue

    def get_token(this, line):

        #finditer : 在字符串中找到正则表达式所匹配的所有字串， 并把他们作为一个迭代器返回
        for match in re.finditer(this.patterns, line):

            #group()：匹配的整个表达式的字符 # yield 关键字：类似return ，返回的是一个生成器，generator
            yield (match.lastgroup, match.group())

    def run(this, line, flag=True):
        for token in this.get_token(line):
            if flag:
                print ("line %3d :" % this.lineno, token)
            '''
            else:
                yield "line %3d :" % this.lineno + str(token) + "\n"
            '''

    def printrun(this, line, flag = True):
        for token in this.get_token(line):
            if flag:
                print ("lines x: ", token)


if __name__=='__main__':
         token = Token()
         filepath = "D:/Test.c"

         lines = token.read_file(filepath)

         for line in lines:
             token.run(line, True)

             #写入指定文件中
             #token.write_file(token.run(line, False), "D:/results.txt")
             token.lineno += 1