0_基于c++语言的词法解释器(未经测试尚在改进中)

最新推荐文章于 2022-08-02 19:01:11 发布

github_36605722

最新推荐文章于 2022-08-02 19:01:11 发布

阅读量251

点赞数

分类专栏： project_complier 文章标签：编译原理词法解释器

本文链接：https://blog.csdn.net/github_36605722/article/details/53248656

版权

project_complier 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

// lexicals_analyzer.cpp

#include <cstdio>
#include <iostream>
#include <cstdlib>
#include <windows.h>
using namespace std;

class lexicals_analyzer
{
private:
	static const int MAX_LEXICALS_LENGTH = 200;    //每次最长字符数
	static const int MAX_TOCKEN_LENGTH = 50;       //每次单个词素字符数
	static const int GAP = 10;                     //当最大容量与当前容量差10重置内存
	static const int single_tockens_num = 20;//特殊词素
	char single_tockens[single_tockens_num]; //特殊字符
	int larger_times;                        //总字符放大倍数
	int tocken_larger_times;                 //单个词素放大倍数
	int cur_cont;                            //字符总数当前容量
	char *lexicals;                          //总字符数组
	char *tocken;                            //单个词素数组
	char *ptocken;                           //指向总字符指针
	FILE *fp;
	void add(char **buf, int *pcur_cont, const char ch)
	{
		if (larger_times*MAX_LEXICALS_LENGTH - *pcur_cont == GAP)//检查扩大总字符容量
		{
			++larger_times;
			*buf = (char*)realloc(*buf, larger_times * MAX_LEXICALS_LENGTH);
			if (*buf == NULL)
				cout << "解析失败\n";
		}
		if (*buf == NULL)
		{
			int a = 1;
			++a;
		}
		(*buf)[(*pcur_cont)++] = ch;
	}
	bool text_single(const char ch)             //检查是否是特殊字符
	{
		for (int i = 0; i<single_tockens_num; ++i)
		{
			if (ch == single_tockens[i])
				return true;
		}
		return false;
	}
public:
	lexicals_analyzer(const char *file_path)
	{
		{
			single_tockens[0] = '.'; single_tockens[1] = ','; single_tockens[2] = '!';
			single_tockens[3] = '='; single_tockens[4] = '+'; single_tockens[5] = '-';
			single_tockens[6] = '*'; single_tockens[7] = '/'; single_tockens[8] = '%';
			single_tockens[9] = '&'; single_tockens[10] = '~'; single_tockens[11] = '(';
			single_tockens[12] = ')'; single_tockens[13] = '-'; single_tockens[14] = '[';
			single_tockens[15] = ']'; single_tockens[16] = '{'; single_tockens[17] = '}';
			single_tockens[18] = '>'; single_tockens[19] = '<';
		}
		larger_times = 1;
		lexicals = (char*)malloc(larger_times*MAX_LEXICALS_LENGTH);
		tocken_larger_times = 1;
		tocken = (char*)malloc(tocken_larger_times * MAX_TOCKEN_LENGTH);
		if ((fp = fopen(file_path, "r")) == NULL)
		{
			cerr << "Open file wrong" << endl;
			exit(1);
		}
		cur_cont = 0;
		char temp;
		while ((temp = getc(fp)) != EOF)
		{
			if (temp == '\n' || temp == '\t' || temp == ' ' || temp == ';')//处理分隔符
			{
				if (lexicals[cur_cont - 1] != ' ')
					add(&lexicals, &cur_cont, ' ');
			}
			else if (temp == '\"')  //处理双引号
			{
				bool flag = true;
				do
				{
					flag = true;
					if (temp == '\\')
						flag = false;
					add(&lexicals, &cur_cont, temp);
				} while ((temp = getc(fp)) != EOF && (temp != '\"' || !flag));
				if (temp == EOF)
					break;
				add(&lexicals, &cur_cont, temp);
				add(&lexicals, &cur_cont, ' ');
			}

			else if (temp == '\'')      //处理单引号
			{
				bool flag = true;
				do
				{
					flag = true;
					if (temp == '\\')
						flag = false;
					add(&lexicals, &cur_cont, temp);
				} while ((temp = getc(fp)) != EOF && (temp != '\'' || !flag));
				if (temp == EOF)
					break;
				add(&lexicals, &cur_cont, temp);
				add(&lexicals, &cur_cont, ' ');
			}
			else if (temp == '/')
			{
				temp = getc(fp);
				if (temp != EOF && temp == '/')         //处理单行注释
				{
					while ((temp = getc(fp)) != EOF && temp != '\n');
					if (lexicals[cur_cont - 1] != ' ')
						add(&lexicals, &cur_cont, ' ');
				}
				else if (temp != EOF && temp == '*')        //处理多行注释
				{
					bool flag = false;
					while ((temp = getc(fp)) != EOF)
					{
						if (temp == '*')
							flag = true;
						else if (flag && temp == '/')
							break;
						else if (flag && temp != '/')
							flag = false;
					}
					if (lexicals[cur_cont - 1] != ' ')
						add(&lexicals, &cur_cont, ' ');
				}
				else        //不是注释 但是正常添加 虽然不合法
				{
					if (temp == EOF)
						break;
					add(&lexicals, &cur_cont, '/');
					add(&lexicals, &cur_cont, temp);
				}
			}
			else if (text_single(temp))     //检测特殊字符
			{
				if (lexicals[cur_cont - 1] != ' ')
					add(&lexicals, &cur_cont, ' ');
				if (temp == EOF)
					break;
				add(&lexicals, &cur_cont, temp);
				add(&lexicals, &cur_cont, ' ');
			}
			else        //正常添加
			{
				if (temp == EOF)
					break;
				add(&lexicals, &cur_cont, temp);
			}
		}

		fclose(fp);
		lexicals[cur_cont] = '\0';      //添加结束符
		//-----------------------
		//cout << lexicals << endl;
		//system("pause");
		//------------------------
		ptocken = lexicals;     //初始化ptocken指向总字符首端
	}
	char *get_next_tocken()     //返回ptocken 指向位置第一个字符串的指针
	{
		int i = 0;
		memset(tocken, 0, sizeof(tocken));
		for (; *ptocken == ' '; ++ptocken);
		for (; *ptocken != '\0'&&*ptocken != ' '; ++ptocken,++i)
		{
			if ((MAX_TOCKEN_LENGTH*tocken_larger_times) - i == 5)	//扩大单个词素容量
			{
				++tocken_larger_times;
				tocken = (char*)realloc(tocken, tocken_larger_times * MAX_TOCKEN_LENGTH);
			}
			tocken[i] = *ptocken;
		}
		tocken[i] = '\0';
		if (tocken[0] == '\0')
			return NULL;
		return tocken;
	}
	int get_l_length()      //返回总字符的长度
	{
		return cur_cont;
	}

};

在学习编译原理自己瞎写尚需改进欢迎提建议

// main

#include "lexicals_analyzer.cpp"

using namespace std;

int main()
{
	lexicals_analyzer la("a.cpp");
	char *a;
	while ((a = la.get_next_tocken()) != NULL)
	{
		cout << a << endl;
	}
	system("pause");
	return 0;
}