// lexicals_analyzer.cpp
#include <cstdio>
#include <iostream>
#include <cstdlib>
#include <windows.h>
using namespace std;
class lexicals_analyzer
{
private:
static const int MAX_LEXICALS_LENGTH = 200; //每次最长字符数
static const int MAX_TOCKEN_LENGTH = 50; //每次单个词素字符数
static const int GAP = 10; //当最大容量与当前容量差10重置内存
static const int single_tockens_num = 20;//特殊词素
char single_tockens[single_tockens_num]; //特殊字符
int larger_times; //总字符放大倍数
int tocken_larger_times; //单个词素放大倍数
int cur_cont; //字符总数当前容量
char *lexicals; //总字符数组
char *tocken; //单个词素数组
char *ptocken; //指向总字符指针
FILE *fp;
void add(char **buf, int *pcur_cont, const char ch)
{
if (larger_times*MAX_LEXICALS_LENGTH - *pcur_cont == GAP)//检查扩大总字符容量
{
++larger_times;
*buf = (char*)realloc(*buf, larger_times * MAX_LEXICALS_LENGTH);
if (*buf == NULL)
cout << "解析失败\n";
}
if (*buf == NULL)
{
int a = 1;
++a;
}
(*buf)[(*pcur_cont)++] = ch;
}
bool text_single(const char ch) //检查是否是特殊字符
{
for (int i = 0; i<single_tockens_num; ++i)
{
if (ch == single_tockens[i])
return true;
}
return false;
}
public:
lexicals_analyzer(const char *file_path)
{
{
single_tockens[0] = '.'; single_tockens[1] = ','; single_tockens[2] = '!';
single_tockens[3] = '='; single_tockens[4] = '+'; single_tockens[5] = '-';
single_tockens[6] = '*'; single_tockens[7] = '/'; single_tockens[8] = '%';
single_tockens[9] = '&'; single_tockens[10] = '~'; single_tockens[11] = '(';
single_tockens[12] = ')'; single_tockens[13] = '-'; single_tockens[14] = '[';
single_tockens[15] = ']'; single_tockens[16] = '{'; single_tockens[17] = '}';
single_tockens[18] = '>'; single_tockens[19] = '<';
}
larger_times = 1;
lexicals = (char*)malloc(larger_times*MAX_LEXICALS_LENGTH);
tocken_larger_times = 1;
tocken = (char*)malloc(tocken_larger_times * MAX_TOCKEN_LENGTH);
if ((fp = fopen(file_path, "r")) == NULL)
{
cerr << "Open file wrong" << endl;
exit(1);
}
cur_cont = 0;
char temp;
while ((temp = getc(fp)) != EOF)
{
if (temp == '\n' || temp == '\t' || temp == ' ' || temp == ';')//处理分隔符
{
if (lexicals[cur_cont - 1] != ' ')
add(&lexicals, &cur_cont, ' ');
}
else if (temp == '\"') //处理双引号
{
bool flag = true;
do
{
flag = true;
if (temp == '\\')
flag = false;
add(&lexicals, &cur_cont, temp);
} while ((temp = getc(fp)) != EOF && (temp != '\"' || !flag));
if (temp == EOF)
break;
add(&lexicals, &cur_cont, temp);
add(&lexicals, &cur_cont, ' ');
}
else if (temp == '\'') //处理单引号
{
bool flag = true;
do
{
flag = true;
if (temp == '\\')
flag = false;
add(&lexicals, &cur_cont, temp);
} while ((temp = getc(fp)) != EOF && (temp != '\'' || !flag));
if (temp == EOF)
break;
add(&lexicals, &cur_cont, temp);
add(&lexicals, &cur_cont, ' ');
}
else if (temp == '/')
{
temp = getc(fp);
if (temp != EOF && temp == '/') //处理单行注释
{
while ((temp = getc(fp)) != EOF && temp != '\n');
if (lexicals[cur_cont - 1] != ' ')
add(&lexicals, &cur_cont, ' ');
}
else if (temp != EOF && temp == '*') //处理多行注释
{
bool flag = false;
while ((temp = getc(fp)) != EOF)
{
if (temp == '*')
flag = true;
else if (flag && temp == '/')
break;
else if (flag && temp != '/')
flag = false;
}
if (lexicals[cur_cont - 1] != ' ')
add(&lexicals, &cur_cont, ' ');
}
else //不是注释 但是正常添加 虽然不合法
{
if (temp == EOF)
break;
add(&lexicals, &cur_cont, '/');
add(&lexicals, &cur_cont, temp);
}
}
else if (text_single(temp)) //检测特殊字符
{
if (lexicals[cur_cont - 1] != ' ')
add(&lexicals, &cur_cont, ' ');
if (temp == EOF)
break;
add(&lexicals, &cur_cont, temp);
add(&lexicals, &cur_cont, ' ');
}
else //正常添加
{
if (temp == EOF)
break;
add(&lexicals, &cur_cont, temp);
}
}
fclose(fp);
lexicals[cur_cont] = '\0'; //添加结束符
//-----------------------
//cout << lexicals << endl;
//system("pause");
//------------------------
ptocken = lexicals; //初始化ptocken指向总字符首端
}
char *get_next_tocken() //返回ptocken 指向位置第一个字符串的指针
{
int i = 0;
memset(tocken, 0, sizeof(tocken));
for (; *ptocken == ' '; ++ptocken);
for (; *ptocken != '\0'&&*ptocken != ' '; ++ptocken,++i)
{
if ((MAX_TOCKEN_LENGTH*tocken_larger_times) - i == 5) //扩大单个词素容量
{
++tocken_larger_times;
tocken = (char*)realloc(tocken, tocken_larger_times * MAX_TOCKEN_LENGTH);
}
tocken[i] = *ptocken;
}
tocken[i] = '\0';
if (tocken[0] == '\0')
return NULL;
return tocken;
}
int get_l_length() //返回总字符的长度
{
return cur_cont;
}
};
在学习 编译原理 自己瞎写 尚需改进 欢迎提建议
// main
#include "lexicals_analyzer.cpp"
using namespace std;
int main()
{
lexicals_analyzer la("a.cpp");
char *a;
while ((a = la.get_next_tocken()) != NULL)
{
cout << a << endl;
}
system("pause");
return 0;
}