输入为字符串(或待进行词法分析的源程序),输出为单词串,即由(单词,类别)所组成的二元组序列;
有一定的错误检查能力,例如能发现2a这类不能作为单词的字符串。
保留字表没有做全,用时自行添加即可。
C++实现如下:
#include<fstream>
#include<cstring>
#include<string>
#include<fstream>
#include<sstream>
#include<iostream>
#include<map>
#include<bits/stdc++.h>
using namespace std;
map<string,string> word;//保留字表
std::map<string,string>::iterator it;//查找保留字的迭代器
void map_init(){//对应关系进行初始化
word["for"]="保留字";
word["else"]="保留字";
word["const"]="保留字";
word["do"]="保留字";
word["endl"]="保留字";
word["if"]="保留字";
word["cout"]="保留字";
word["void"]="保留字";
word["read"]="保留字";
word["then"]="保留字";
word["cin"]="保留字";
word["while"]="保留字";
word["break"]="保留字";
word["int"]="保留字";
word["double"]="保留字";
word["float"]="保留字";
word["long"]="保留字";
word["main"]="保留字";
word["continue"]="保留字";
word["return"]="保留字";
word["bool"]="保留字";
word["string"]="保留字";
word["getline"]="保留字";
word["true"]="保留字";
word["false"]="保留字";
word["+"]="运算符";
word["-"]="运算符";
word["*"]="运算符";
word["/"]="运算符";
word["="]="运算符";
word["<"]="运算符";
word["<="]="运算符";
word["<<"]="运算符";
word[">>"]="运算符";
word[">"]="运算符";
word["%"]="运算符";
word[">="]="运算符";
word["=="]="运算符";
word["!="]="运算符";
word["|"]="运算符";
word["||"]="运算符";
word["&"]="运算符";
word["&&"]="运算符";
word["("]="界符";
word[")"]="界符";
word[","]="分隔符";
word[";"]="界符";
word["."]="运算符";
word["->"]="运算符";
word["["]="界符";
word["]"]="界符";
word["{"]="界符";
word["}"]="界符";
word["}"]="界符";
word["'"]="界符";
}
int main(){
map_init(); //初始化
char ch;
char a;
string word1; //识别单词
string str; //字符识别
ifstream infile("E:\\编译原理\\ans.txt"); //源程序文件路径输入
ostringstream buf;
while(buf&&infile.get(ch)) buf.put(ch); //将文件中的字符读出来
str= buf.str(); //获取得到的字符
int csize=str.length();
for(int i=0;i<csize;i++){ //对整个字符串进行遍历
while(str[i]==' '||str[i]=='\n'||str[i]==' ') i++; //若最开始为空格、制表符或换行符,则将指针的位置往后移
if(isalpha(str[i])){ //对标识符和保留字进行识别,调用库函数isalpha()
word1=str[i++];
while(isalpha(str[i])||isdigit(str[i])){
word1+=str[i++];
}
it=word.find(word1);
if(it!=word.end()){ //判断是不是保留字,若为保留字则进行输出
cout<<word1<<","<<word[word1]<<endl;
}
else{ //否则直接输出
cout<<word1<<",标识符"<<endl;
}
i--;
}
else if(isdigit(str[i])){ //判断是不是常数,调用库函数isdigit()
word1=str[i++];
while(isdigit(str[i])){
word1+=str[i++];
}
if(isalpha(str[i])){
while(isalpha(str[i])||isdigit(str[i])){
word1+=str[i++];
}
cout<<word1<<",非法定义的标识符"<<endl;
}
else{
cout<<word1<<",数字"<<endl;
}
i--;
}else if(str[i]=='<'){//对<,<=,<<分别进行判断
word1=str[i++];
if(str[i]=='='){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]=='<'){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]!=' '||!isdigit(str[i])||!isalpha(str[i])){
cout<<word1<<","<<word[word1]<<endl;
}else{
cout<<"error!"<<endl;
break;
}
i--;
}else if(str[i]=='>'){//对>,>=,>>分别进行判断
word1=str[i++];
if(str[i]=='='){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]=='>'){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]!=' '||!isdigit(str[i])||!isalpha(str[i])){
cout<<word1<<","<<word[word1]<<endl;
}else{
cout<<"error!"<<endl;
break;
}
i--;
}else if(str[i]=='-'){//对-,->进行判断
word1=str[i++];
if(str[i]=='>'){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]!=' '||!isdigit(str[i])||!isalpha(str[i])){
cout<<word1<<","<<word[word1]<<endl;
}else{
cout<<"error!"<<endl;
break;
}
i--;
}else if(str[i]=='='){//对==分别进行判断
word1=str[i++];
if(str[i]=='='){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]!=' '||!isdigit(str[i])||!isalpha(str[i])){
cout<<word1<<","<<word[word1]<<endl;
}else{
cout<<"error!"<<endl;
break;
}
i--;
}else if(str[i]=='!'){//对!=分别进行判断
word1=str[i++];
if(str[i]=='='){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]!=' '||!isdigit(str[i])||!isalpha(str[i])){
cout<<word1<<","<<word[word1]<<endl;
}else{
cout<<"error!"<<endl;
break;
}
i--;
}else if(str[i]=='|'){//对|,||分别进行判断
word1=str[i++];
if(str[i]=='|'){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]!=' '||!isdigit(str[i])||!isalpha(str[i])){
cout<<word1<<","<<word[word1]<<endl;
}else{
cout<<"error!"<<endl;
break;
}
i--;
}else if(str[i]=='&'){//对&,&&分别进行判断
word1=str[i++];
if(str[i]=='&'){
word1+=str[i];
cout<<word1<<","<<word[word1]<<endl;
i++;
}else if(str[i]!=' '||!isdigit(str[i])||!isalpha(str[i])){
cout<<word1<<","<<word[word1]<<endl;
}else{
cout<<"error!"<<endl;
break;
}
i--;
}else if(str[i]=='"'){//对字符串进行判断
word1=str[i++];
while(str[i]!='"'){
word1+=str[i++];
}
word1+=str[i++];
cout<<word1<<",字符串"<<endl;
i--;
}else{ //对其他的基本字依次进行判断
word1=str[i];
it=word.find(word1);
if(it!=word.end()){
cout<<word1<<","<<word[word1]<<endl;
}else{
cout<<"error!"<<endl;
break;
}
}
}
infile.close();
return 0;
}
ans.txt文件为要识别的源程序文件,将要识别的源程序放入其中即可。