词法分析器C/C++实现
实验目的
设计、编制并调试一个词法分析程序,加深对词法分析原理的理解。
实验内容
对C语言的标识符,关键字,数字,运算符等进行分析
源代码从文件读取
源代码如下
测试输入C/C++程序
#include<iostream>
#include<string>
#include<cmath>
#include<cstring>
int main()
{
int T,a,b,c,ans;
cin>>T;
while(T--)
{
cin>>a>>b>>c;
ans=0;
1@23
if(c%gcd(a,b)!=0)
{
cout<<0<<endl;
}
else
{
for(int i=1;i<=(c-b)/a;i++)
{
if((c-a*i)/b*b==(c-a*i))
ans++;
}
cout<<ans<<endl;
}
}
return 0;
}
词法分析程序
Main.cpp
#include <cstdio>
#include <iostream>
#include <cstring>
#include <string>
#define _CRT_SECURE_NO_WARNINGS
#pragma warning(disable: 4996)
#define _LEXAN_H_
//关键字
static char keywords[50][13] = { "short", "int", "long", "float", "double", "char", "struct",
"union", "enum", "typedef", "const", "unsigned", "signed",
"extern", "static", "void", "if", "else", "switch", "case",
"for", "do", "while","goto", "continue", "break", "default",
"sizeof", "return", "true", "false" };
class Base{
public:
int buffernum;
public:
Base(){ buffernum = 0; }
virtual ~Base();
int charkind(char c);//判断字符类型
int spaces(char c); //当前空格是否可以消除
int characters(char c);//是否是字母
int keyword(char str[]);//是否是关键字
int signwords(char str[]);//是否是标识符
int numbers(char c);//是否是数字
int integers(char str[]);//是否是整数
int floats(char str[]);//是否是浮点型
};
int Base::charkind(char c) //判断字符类型
{
if (characters(c) == 1)
{
return 1;
}
if (numbers(c) == 1)
{
return 2;
}
if (c == '$' || c == '_')
{
return 3;
}
if (c == '\\')
{
return 4;
}
if (c == '=')
{
return 5;
}
return 0;
}
int Base::spaces(char c) //当前空格是否可消除
{
if ((c > 'z' || (c < 'a' && c > 'Z') || (c < 'A' && c > '9') || (c < '0')) && c != '_' && c != '$')
{
return 1;
}
return 0;
}
int Base::characters(char c) //是否是字母
{
if ((c <= 'z' && c >= 'a') || (c <= 'Z' && c >= 'A'))
{
return 1;
}
return 0;
}
int Base::keyword(char str[]) //是否关键字
{
int i;
for (i = 0; i < 50; i++)
{
if (strcmp(str, keywords[i]) == 0)
{
return 1;
}
}
return 0;
}
int Base::signwords(char str[]) //是否标识符
{
int i;
if (str[0] == '$' || str[0] == '_' || characters(str[0]) == 1)
{
for (i = 0; str[i] != '\0'; i++)
{
if (spaces(str[i]) == 1)
{
return 0;
}
}
return 1;
}
return 0;
}
int Base::numbers(char c) //是否数字
{
if (c <= '9' && c >= '0')
{
return 1;
}
return 0;
}
int Base::integers(char str[]) //是否整数
{
int i;
if (str[0] == '-' || numbers(str[0]) == 1)
{
for (i = 0; i < strlen(str); i++)
{
if (str[i] == '.')
{
return 0;
}
if ((str[i] == 'x' || str[i] == 'X') && (((str[0] == '-' || str[0] == '+') && (str[1] != '0' || i > 2)) || (str[0] != '-' && str[0] != '+' && (str[0] != '0' || i > 1))))
{
return 0;
}
if ((i < strlen(str) - 1) && numbers(str[i]) == 0 && str[i] != 'x' && str[i] != 'X')
{
if (strlen(str) > 2 && strnicmp("0x", str, 2) == 0 || strnicmp("-0x", str, 3) == 0)
{
if (str[i] >= 'A' && str[i] <= 'F')
{
continue;
}
}
return 0;
}
if ((i == strlen(str) - 1) && numbers(str[i]) == 0 && str[i] != 'L')
{
if (strlen(str) > 2 && strnicmp("0x", str, 2) == 0 || strnicmp("-0x", str, 3) == 0)
{
if (str[i] >= 'A' && str[i] <= 'F')
{
continue;
}
}
return 0;
}
}
return 1;
}
return 0;
}
int Base::floats(char str[]) //是否是浮点数
{
int i;
int flag = 0;
if (str[0] == '-' || numbers(str[0]) == 1)
{
for (i = 0; i < strlen(str); i++)
{
if (str[i] == '.')
{
if (flag == 0)
{
flag = 1;
continue;
}
else
{
return 0;
}
}
if ((str[i] == 'x' || str[i] == 'X') && (((str[0] == '-' || str[0] == '+') && (str[1] != '0' || i > 2)) || (str[0] != '-' && str[0] != '+' && (str[0] != '0' || i > 1))))
{
return 0;
}
if (numbers(str[i]) == 0 && str[i] != 'x' && str[i] != 'X')
{
if (strlen(str) > 2 && strnicmp("0x", str, 2) == 0 || strnicmp("-0x", str, 3) == 0)
{
if (str[i] >= 'A' && str[i] <= 'F')
{
continue;
}
}
return 0;
}
}
return flag;
}
return 0;
}
Base::~Base(){}
class LexAn : public Base
{
private:
FILE *fin;
FILE *fout;
char bufferin[2][256];
char bufferscan[256];
public:
LexAn()
{
fin = fopen("in.txt", "r");
fout = fopen("out.txt", "w");
}
virtual ~LexAn();
void scanwords(); //处理每一行
void clearnotes(); //清除注释和多余的空格
void getwords(int state); //处理出单词
void wordkind(char str[]); //判断单词类型并且输出
};
void LexAn::scanwords() //处理每一行
{
char c;
int i, j, k;
int count = 0;
int chgLine = 0;
while (1)
{
c = fgetc(fin);
if (c == EOF)
{
break;
}
/*换行符*/
if (c == '\n' || count == 256 - 2)
{
if (count == 256 - 2)
{
bufferin[buffernum][count] = c;
for (i = count; i > 0; i--)
{
if (spaces(bufferin[buffernum][i]) == 1)
{
for (j = 0, k = i + 1; k <= count; k++, j++)
{
bufferin[1 - buffernum][j] = bufferin[buffernum][k];
}
bufferin[1 - buffernum][j] = '\0';
bufferin[buffernum][i + 1] = '\0';
}
}
}
else
{
bufferin[buffernum][count] = '\0';
}
if (c == '\n')
{
chgLine = 1;
}
clearnotes();
if (strlen(bufferin[buffernum]) > 0)
{
strcpy(bufferscan, bufferin[buffernum]);
getwords(0);
}
if (count == 256 - 2)
{
buffernum = 1 - buffernum;
}
if (chgLine == 1)
{
chgLine = 0;
}
count = 0;
}
else
{
bufferin[buffernum][count++] = c;
}
}
std::cout<<"处理结果已经保存到out.txt下"<<std::endl;
}
void LexAn::clearnotes() //清除注释和多余的空格
{
int i, j, k;
int noteCount = 0;
int flag = 0;
char note[100];
/*注释*/
for (i = 0; bufferin[buffernum][i] != '\0'; i++)
{
if (bufferin[buffernum][i] == '"')
{
flag = 1 - flag;
continue;
}
if (bufferin[buffernum][i] == '/' && flag == 0)
{
if (bufferin[buffernum][i + 1] == '/')
{
for (j = i; bufferin[buffernum][j] != '\0'; j++)
{
note[noteCount++] = bufferin[buffernum][j];
}
note[noteCount] = '\0';
noteCount = 0;
fprintf(fout, " [ %s ] ---- [ 注释 ]\n", note);
//std::cout<<" [ %s ] ---- [ 注释 ]"<<std::endl;
bufferin[buffernum][i] = '\0';
break;
}
if (bufferin[buffernum][i + 1] == '*')
{
note[noteCount++] = '/';
note[noteCount++] = '*';
for (j = i + 2; bufferin[buffernum][j] != '\0'; j++)
{
note[noteCount++] = bufferin[buffernum][j];
if (bufferin[buffernum][j] == '*' && bufferin[buffernum][j + 1] == '/')
{
j += 2;
note[noteCount++] = bufferin[buffernum][j];
note[noteCount] = '\0';
noteCount = 0;
fprintf(fout, " [ %s ] ---- [ 注释 ]\n", note);
//std::cout<<" [ %s ] ---- [ 注释 ]"<<std::endl;
break;
}
}
for (; bufferin[buffernum][j] != '\0'; j++, i++)
{
bufferin[buffernum][i] = bufferin[buffernum][j];
}
if (bufferin[buffernum][j] == '\0')
{
bufferin[buffernum][i] = '\0';
}
}
}
}
//空格
for (i = 0, flag = 0; bufferin[buffernum][i] != '\0'; i++)
{
if (bufferin[buffernum][i] == '"')
{
flag = 1 - flag;
continue;
}
if (bufferin[buffernum][i] == ' ' && flag == 0)
{
for (j = i + 1; bufferin[buffernum][j] != '\0' && bufferin[buffernum][j] == ' '; j++)
{
}
if (bufferin[buffernum][j] == '\0')
{
bufferin[buffernum][i] = '\0';
break;
}
if (bufferin[buffernum][j] != '\0' && ((spaces(bufferin[buffernum][j]) == 1) || (i > 0 && spaces(bufferin[buffernum][i - 1]) == 1)))
{
for (k = i; bufferin[buffernum][j] != '\0'; j++, k++)
{
bufferin[buffernum][k] = bufferin[buffernum][j];
}
bufferin[buffernum][k] = '\0';
i--;
}
}
}
//制表符
for (i = 0, flag = 0; bufferin[buffernum][i] != '\0'; i++)
{
if (bufferin[buffernum][i] == '\t')
{
for (j = i; bufferin[buffernum][j] != '\0'; j++)
{
bufferin[buffernum][j] = bufferin[buffernum][j + 1];
}
i = -1;
}
}
}
//状态机
void LexAn::getwords(int state) //处理出单词
{
char word[100];
int charCount = 0;
int finish = 0;
int num;
int i, j, k;
for (i = 0; bufferscan[i] != '\0'; i++)
{
switch (state / 10)
{
case 0:
switch (charkind(bufferscan[i]))
{
case 1:
word[charCount++] = bufferscan[i];
state = 10;
break;
case 2:
word[charCount++] = bufferscan[i];
state = 20;
break;
case 3:
word[charCount++] = bufferscan[i];
state = 30;
break;
case 0:
case 5:
word[charCount++] = bufferscan[i];
switch (bufferscan[i])
{
case '"':
state = 41;
break;
case '\'':
state = 42;
break;
case '(': case ')': case '{': case '}': case '[': case ']': case ';': case ',': case '.':
state = 50;
word[charCount] = '\0';
finish = 1;
break;
case '=':
state = 43;
break;
default:
state = 40;
break;
}
break;
default: word[charCount++] = bufferscan[i]; break;
}
break;
case 1:
switch (charkind(bufferscan[i]))
{
case 1:
word[charCount++] = bufferscan[i];
state = 10;
break;
case 2:
word[charCount++] = bufferscan[i];
state = 20;
break;
case 3:
word[charCount++] = bufferscan[i];
state = 30;
break;
case 0:case 5:
word[charCount] = '\0';
num = 0;
while (word[num] != '\0')
num++;
//长度的处理 !!
if (num>7)
word[7] = '\0';
i--;
finish = 1;
state = 50;
break;
default: word[charCount++] = bufferscan[i]; break;
}
break;
case 2:
switch (charkind(bufferscan[i]))
{
case 1:
word[charCount++] = bufferscan[i];
state = 20;
break;
case 2:
word[charCount++] = bufferscan[i];
state = 20;
break;
case 3:
word[charCount++] = bufferscan[i];
state = 30;
break;
case 0:
if (bufferscan[i] == '.')
{
word[charCount++] = bufferscan[i];
state = 20;
break;
}
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
default: word[charCount++] = bufferscan[i]; break;
}
break;
case 3:
switch (charkind(bufferscan[i]))
{
case 1:
word[charCount++] = bufferscan[i];
state = 30;
break;
case 2:
word[charCount++] = bufferscan[i];
state = 30;
break;
case 3:
word[charCount++] = bufferscan[i];
state = 30;
break;
case 0:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
default: word[charCount++] = bufferscan[i]; break;
}
break;
case 4:
switch (state)
{
case 40:
switch (charkind(bufferscan[i]))
{
case 1:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
case 2:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
case 3:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
case 0:
word[charCount++] = bufferscan[i];
state = 40;
break;
default: word[charCount++] = bufferscan[i]; break;
}
break;
case 41:
word[charCount++] = bufferscan[i];
if (bufferscan[i] == '"')
{
if (charkind(bufferscan[i - 1]) == 4)
{
}
else
{
word[charCount] = '\0';
finish = 1;
state = 50;
}
}
break;
case 42:
word[charCount++] = bufferscan[i];
if (bufferscan[i] == '\'')
{
word[charCount] = '\0';
finish = 1;
state = 50;
}
break;
case 43:
if (bufferscan[i] == '=')
{
word[charCount++] = bufferscan[i];
state = 43;
}
else
{
word[charCount] = '\0';
finish = 1;
i--;
state = 50;
}
break;
default: word[charCount++] = bufferscan[i]; break;
}
break;
case 5:
finish = 0;
state = 0;
charCount = 0;
i--;
wordkind(word);
break;
default:break;
}
if (bufferscan[i + 1] == '\0')
{
word[charCount] = '\0';
wordkind(word);
}
}
}
void LexAn::wordkind(char str[]) //判断单词类型并且输出
{
int i, j, k;
int flag = 0;
/*判断是否为关键字或标识符*/
if (keyword(str) == 1)
{
if (strcmp(str, "true") == 0 || strcmp(str, "false") == 0)
{
fprintf(fout, " [ %s ] ---- [ 布尔型 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 布尔型 ]"<<std::endl;
}
else
{
fprintf(fout, " [ %s ] ---- [ 关键字 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 关键字 ]"<<std::endl;
}
}
else if (signwords(str) == 1)
{
fprintf(fout, " [ %s ] ---- [ 标识符 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 标识符 ]"<<std::endl;
}
else if (integers(str) == 1)
{
fprintf(fout, " [ %s ] ---- [ 整型 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 整型 ]"<<std::endl;
}
else if (floats(str) == 1)
{
fprintf(fout, " [ %s ] ---- [ 浮点型 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 浮点型 ]"<<std::endl;
}
else if (str[0] == '\'' && str[strlen(str) - 1] == '\'')
{
fprintf(fout, " [ %s ] ---- [ 字符型 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 字符型 ]"<<std::endl;
}
else if (str[0] == '"' && str[strlen(str) - 1] == '"')
{
fprintf(fout, " [ %s ] ---- [ 字符串 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 字符串 ]"<<std::endl;
}
else if (spaces(str[0]) == 1 && str[0] != '"' && str[0] != '\'')
{
if (strcmp(str, "<") == 0 || strcmp(str, ">") == 0 || strcmp(str, "<=") == 0 || strcmp(str, ">=") == 0)
{
fprintf(fout, " [ %s ] ---- [ < > <= >= ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ < > <= >= ]"<<std::endl;
}
else if (strcmp(str, "<<") == 0 || strcmp(str, ">>") == 0 || strcmp(str, ">>>") == 0 || strcmp(str, "<<<") == 0)
{
fprintf(fout, " [ %s ] ---- [ << >> ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ << >> ]"<<std::endl;
}
else if (strchr(str, '=') != NULL)
{
if (strcmp(str, "==") == 0 || strcmp(str, "!=") == 0)
{
fprintf(fout, " [ %s ] ---- [ == != ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ == != ]"<<std::endl;
}
else
{
fprintf(fout, " [ %s ] ---- [ 运算符 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 运算符 ]"<<std::endl;
}
}
else if (strcmp(str, "||") == 0)
{
fprintf(fout, " [ %s ] ---- [ || ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ || ]"<<std::endl;
}
else if (strcmp(str, "&&") == 0)
{
fprintf(fout, " [ %s ] ---- [ && ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ && ]"<<std::endl;
}
else if (strcmp(str, "++") == 0 || strcmp(str, "--") == 0 || strcmp(str, "!") == 0 || strcmp(str, "~") == 0)
{
fprintf(fout, " [ %s ] ---- [ ++ -- ! ~ ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ ++ -- ! ~ ]"<<std::endl;
}
else if (strlen(str) == 1)
{
switch (str[0])
{
case '?':
case ':': fprintf(fout, " [ %s ] ---- [ ?: ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ ?: ]"<<std::endl;
break;
case ' ': fprintf(fout, " [ %s ] ---- [ 空格 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 空格 ]"<<std::endl;
break;
case '{':
case '}': fprintf(fout, " [ %s ] ---- [ {} ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ {} ]"<<std::endl;
break;
case '[':
case ']':
case '(':
case ')':
case '.': fprintf(fout, " [ %s ] ---- [ [] () . ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ [] () . ]"<<std::endl;
break;
case ',': fprintf(fout, " [ %s ] ---- [ , ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ , ]"<<std::endl;
break;
case ';':
fprintf(fout, " [ %s ] ---- [ ; ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ ; ]"<<std::endl;
break;
case '+':
case '-': fprintf(fout, " [ %s ] ---- [ + ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ + ]"<<std::endl;
break;
case '*':
case '/':
case '%': fprintf(fout, " [ %s ] ---- [ * / ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ * / ]"<<std::endl;
break;
case '|': fprintf(fout, " [ %s ] ---- [ | ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ | ]"<<std::endl;
break;
case '^': fprintf(fout, " [ %s ] ---- [ ^ ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ ^ ]"<<std::endl;
break;
case '&': fprintf(fout, " [ %s ] ---- [ & ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ & ]"<<std::endl;
break;
default: fprintf(fout, " [ %s ] ---- [ 其他符号 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 其他符号 ]"<<std::endl;
break;
}
}
}
else
{
fprintf(fout, " [ %s ] ---- [ 错误的单词 ]\n", str);
std::cout<<" [ "<<str<<" ] ---- [ 错误的单词 ]"<<std::endl;
}
}
LexAn::~LexAn()
{
fclose(fin);
fclose(fout);
}
int main()
{
LexAn res;
res.scanwords();
return 0;
}