该程序实现了对C语言中变量、整数、符号及部分保留字的词法分析,下面给出各单词符号与种别码的对应关系。
1:保留字: int,double,char,if,else,for,while,do,return,break,continue,main
2:变量:以下划线或字母开头的数字字母串,中间可混有下划线,例:_abc1_bcd
3:整数:任意的数字串,程序可将其转化为对应的整型量
4:运算符:+ - * / = == > >= < <= !=
5:分隔符:, ; :
6:界符:( ) [ ] { } ‘ ‘ “ “
代码实现:
main.c
#include <iostream>
#include "LA.h"
using namespace std;
int main() {
char *fn = "test.c";
LA l(fn);
l.show();
l.scan();
return 0;
}
LA.h
#ifndef LEXICAL_ANALYZER_LA_H
#define LEXICAL_ANALYZER_LA_H
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
using namespace std;
//保留字数组
static string rsv_words[12] = {"int","double","char","if","else","for","while","do","return","break","continue","main"};
class LA
{
private:
ifstream fin;
ostringstream sin;
string txt; //存放读入的代码段
string word; //存放构成单词的字符串
int fg; //种别码
int line; //记录行数
int num; //存放整数
char ch; //存放每次读入的字符
public:
void show(){cout<<txt<<endl;};
LA(const char * s);
void scan();
};
#endif //LEXICAL_ANALYZER_LA_H
LA.cpp
#include <iostream>
#include "LA.h"
using namespace std;
LA::LA(const char * s):fin(s),line(1), num(0), fg(0), ch('\0')
{
sin <<fin.rdbuf();
txt = sin.str();
fin.close();
fin.clear();
}
void LA::scan()
{
int p = 0;
//读取第一个非空字符
do{
ch = txt[p++];
if(ch == 10)
line++;
} while(ch == ' ' || ch == 10);
cout<<"line "<<line<<" :"<<endl;
while(ch != '\0'){
while(ch == ' ' || ch == 10){
if(ch == 10){
line++;
cout<<"line "<<line<<" :"<<endl;
}
ch = txt[p++];
}
//处理'_'开头的变量
if(ch == '_'){
word += ch;
ch = txt[p++];
if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')){
fg = 2;
word += ch;
ch = txt[p++];
while((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_'){
word += ch;
ch = txt[p++];
}
}
else{
fg = -1;
word += ch;
ch = txt[p++];
while((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_'){
word += ch;
ch = txt[p++];
}
}
switch(fg){
case -1: cout<<"error: "<<'<'<<word<<'>'<<endl; break;
case 2: cout<<'<'<<fg<<','<<word<<'>'<<endl; break;
default: break;
}
word.clear();
fg = 0;
}
//处理保留字和字母开头的变量
if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')){
fg = 1;
word += ch;
ch = txt[p++];
while((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_'){
word += ch;
ch = txt[p++];
}
for(int i = 0; i < 12; i++){
if(i == 11 && word != rsv_words[i])
fg++;
if(word == rsv_words[i]){
cout<<'<'<<fg<<','<<word<<'>'<<endl;
word.clear();
break;
}
}
if(fg == 2)
cout<<'<'<<fg<<','<<word<<'>'<<endl;
word.clear();
fg = 0;
}
//处理整数
if(ch >= '0' && ch <= '9'){
fg = 3;
num = 10 * num + (ch - '0');
ch = txt[p++];
while(ch >= '0' && ch <= '9'){
num = 10 * num + (ch - '0');
ch = txt[p++];
}
cout<<'<'<<fg<<','<<num<<'>'<<endl;
num = 0;
fg = 0;
}
//处理符号
switch(ch){
//处理运算符
case '+': fg = 4; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '-': fg = 4; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '*': fg = 4; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '/': fg = 4; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '=':
fg = 4;
if(txt[p] == '='){
word += ch;
ch = txt[p++];
word += ch;
ch = txt[p++];
cout<<'<'<<fg<<','<<word<<'>'<<endl;
word.clear();
break;
}
else{
cout<<'<'<<fg<<','<<ch<<'>'<<endl;
ch = txt[p++];
break;
}
case '<':
fg = 4;
if(txt[p] == '='){
word += ch;
ch = txt[p++];
word += ch;
ch = txt[p++];
cout<<'<'<<fg<<','<<word<<'>'<<endl;
word.clear();
break;
}
else{
cout<<'<'<<fg<<','<<ch<<'>'<<endl;
ch = txt[p++];
break;
}
case '>':
fg = 4;
if(txt[p] == '='){
word += ch;
ch = txt[p++];
word += ch;
ch = txt[p++];
cout<<'<'<<fg<<','<<word<<'>'<<endl;
word.clear();
break;
}
else{
cout<<'<'<<fg<<','<<ch<<'>'<<endl;
ch = txt[p++];
break;
}
case '!':
fg = 4;
if(txt[p] == '='){
word += ch;
ch = txt[p++];
word += ch;
ch = txt[p++];
cout<<'<'<<fg<<','<<word<<'>'<<endl;
word.clear();
break;
}
else{
cout<<'<'<<fg<<','<<ch<<'>'<<endl;
ch = txt[p++];
break;
}
//处理分隔符
case ',': fg = 5; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case ';': fg = 5; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case ':': fg = 5; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
//处理界符
case '(': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case ')': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '[': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case ']': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '{': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '}': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '"': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
case '\'': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
}
}
}
测试文件 test.c
int main()
{
int __ab12_asd23;
int _a1 = 001, b2 = 12, abc_def;
for(int i = 0; i <= 10; i++) {
_a1 += b2;
if(_a1 == 37)
break;
else
continue;
}
return 0;
}