#include <iostream>
#include <fstream>
#include <string>
using namespace std;
string KEYWORD[13] ={"const","int","char","void","main","if","else","do","while","for","scanf","printf","return"};//string数组存储关键字
string keyword[13] = {"CONSTTK","INTTK","CHARTK","VOIDTK","MAINTK","IFTK","ELSETK","DOTK","WHILETK","FORTK","SCANFTK","PRINTFTK","RETURNTK"}; //string数组存储关键字内码值
string keyword2[13] = {"<常量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>","<变量定义>"};
char SEPARATER[8]={';','/,','(',')','[',']','{','}'};//分隔符
string separater[8]={"SEMICN","COMMA","LPARENT","RPARENT","LBRACK","RBRACK","LBRACE","RBRACE"};//分隔符内码值
char OPERATOR[7]={'+','-','*','/','>','<','='};//单目运算符
string OPERATOR2[4]={"<=",">=","==","!="};//双目运算符
string operato2[4]={"LEQ","GEQ","EQL","NEQ"};//双目运算符内码值
int index=0;//用于存储关键字的下标
string operato[7] = {"PLUS","MINU","MULT","DIV","GRE","LSS","ASSIGN"};//单目运算符内码值
char FILTER[4]={' ','\t','\r','\n'}; //过滤符
//const int IDENTIFIER=100;//标识符值
const int CONSTANT=101;//常数值
//const int FILTER_VALUE=102;//过滤字符值
/**判断是否为关键字并返回关键字的下标**/
int IsKeyword(string word){
for(int i=0;i<13;i++){
if(KEYWORD[i]==word){
return i+1;//防止第一个0下标被返回这里对i进行了+1
}
}
return 0;
}
/**判断是否为分隔符**/
bool IsSeparater(char ch){
for(int i=0;i<10;i++){
if(SEPARATER[i]==ch){
return true;
}
}
return false;
}
/**判断是否为运算符**/
bool IsOperator(char ch){
for(int i=0;i<7;i++){
if(OPERATOR[i]==ch){
return true;
}
}
return false;
}
/**判断是否为过滤符**/
bool IsFilter(char ch){
for(int i=0;i<4;i++){
if(FILTER[i]==ch){
return true;
}
}
return false;
}
/**判断是否为大写字母**/
bool IsUpLetter(char ch){
if(ch>='A' && ch<='Z') return true;
return false;
}
/**判断是否为小写字母**/
bool IsLowLetter(char ch){
if(ch>='a' && ch<='z') return true;
return false;
}
/**判断是否为数字**/
bool IsDigit(char ch){
if(ch>='0' && ch<='9') return true;
return false;
}
/**返回每个字的值**/
template <class T>//使用模板函数匹配string或char数组并返回匹配成功的值得下标
T value(T *a,int n,T str){
for(int i=0;i<n;i++){
if(a[i]==str) return i;
}
return -1;
}
/**词法分析**/
void analyse(FILE * fpin){
ofstream out;
out.open("output.txt");
//out.open("C:\\Users\\10512\\Desktop\\此法分析器实验\\2.txt");
char ch=' ';
string arr="";
string arr2="";
while((ch=fgetc(fpin))!=EOF){//每次读取一个字符
arr="";
if(IsFilter(ch)){ //判断是否为过滤符
}
else if(IsLowLetter(ch) | ch=='_'){ //判断是否为关键字
while(IsLowLetter(ch)||IsDigit(ch)||ch=='_'||IsUpLetter(ch)){ //读取所有的字符
arr += ch;
ch=fgetc(fpin);
}
if(index=IsKeyword(arr)){ //判断是否为13关键字中的一个
cout<<keyword[index-1]+" "<<arr<<endl;
if(arr2==""){
arr2=keyword[index-1];
}
out<<keyword[index-1]+" "<<arr<<endl;
fseek(fpin,-1L,SEEK_CUR);//指针往回退一个
}
else
{
cout<<"IDENFR "<<arr<<endl;
//out<<"IDENFR "<<arr<<endl;
fseek(fpin,-1L,SEEK_CUR);
}
}
else if(IsDigit(ch)){ //判断是否为数字
while(IsDigit(ch)||(ch=='.'&&IsDigit(fgetc(fpin)))){
arr += ch;
ch=fgetc(fpin);
}
fseek(fpin,-1L,SEEK_CUR);
cout<<"INTCON "<<arr<<endl;
cout<<"<无符号整数>\n<整数>"<<endl;//#语法分析器新增代码
out<<"INTCON "<<arr<<endl;
}
else if(IsUpLetter(ch)||IsLowLetter(ch)){ //判断是否为字母
while(IsUpLetter(ch)||IsLowLetter(ch)||IsDigit(ch)){
arr += ch;
ch=fgetc(fpin);
}
fseek(fpin,-1L,SEEK_CUR);
cout<<CONSTANT+" "<<arr<<endl;
out<<"arr"<<endl;
}
else switch(ch){
case '+':
case '-':
case '*':
case '/':
case '>':
case '<':
case '=':
case '!':
{arr += ch;
if((ch=fgetc(fpin))!=EOF&IsOperator(ch)){//匹配到单目运算符后,再次匹配后面的字符是否还有运算符
arr +=ch;
for(int i=0;i<4;i++){
if(OPERATOR2[i]==arr){
cout<<operato2[i]+" "<<arr<<endl;
out<<operato2[i]+" "<<arr<<endl;
break;
}
}
break;
}
else{//匹配到单目运算符后直接输出
fseek(fpin,-1L,SEEK_CUR);
cout<<operato[value(OPERATOR,8,*arr.data())]+" "<<arr<<endl;
out<<operato[value(OPERATOR,8,*arr.data())]+" "<<arr<<endl;
break;
}
}
case ';':{
int i = IsKeyword(arr2);
cout<<arr2<<" "<<i<<" "<<keyword2[i]<<endl;
arr2="";
}
case ',':
case '(':
{//匹配(后面的字母判断是否为标识符或者关键字 else 则只输出输出(
arr += ch;
if((ch=fgetc(fpin))!=EOF&(IsUpLetter(ch)|(IsLowLetter(ch)))){
//首先输出( 及其内码值
cout<<separater[value(SEPARATER,8,*arr.data())]+" "<<arr<<endl;
out<<separater[value(SEPARATER,8,*arr.data())]+" "<<arr<<endl;
arr="";//去掉(
while(IsUpLetter(ch)||IsLowLetter(ch)||IsDigit(ch)){ //读取所有的字符
arr += ch;
ch=fgetc(fpin);
}
if(index=IsKeyword(arr)){ //判断是否为13关键字中的一个
cout<<keyword[index-1]+" "<<arr<<endl;
out<<keyword[index-1]+" "<<arr<<endl;
fseek(fpin,-1L,SEEK_CUR);
break;
}
else
{
cout<<"IDENFR "<<arr<<endl;
out<<"IDENFR "<<arr<<endl;
fseek(fpin,-1L,SEEK_CUR);
break;
}
}else{//匹配不到其他字符则直接输出 ; , (本身及其内码值
fseek(fpin,-1L,SEEK_CUR);
cout<<separater[value(SEPARATER,8,*arr.data())]+" "<<arr<<endl;
out<<separater[value(SEPARATER,8,*arr.data())]+" "<<arr<<endl;
break;
}
}
case ')':
case '[':
case ']':
case '{':
case '}':
{//直接输出以上匹配到的字符
arr += ch;
cout<<separater[value(SEPARATER,8,*arr.data())]+" "<<arr<<endl;
out<<separater[value(SEPARATER,8,*arr.data())]+" "<<arr<<endl;
break;
}
case '_':
//匹配_后面的是否有字母有则输出为标识符 else 则只输出输出 _
{ arr += ch;
if((ch=fgetc(fpin))!=EOF&(IsUpLetter(ch)|(IsLowLetter(ch)))){
while(IsLowLetter(ch)||IsDigit(ch)){ //读取所有的字符
arr += ch;
ch=fgetc(fpin);
}
cout<<"IDENFR "<<arr<<endl;
out<<"IDENFR "<<arr<<endl;
fseek(fpin,-1L,SEEK_CUR);
}else{
cout<<"CHARCON _"<<endl;
out<<"CHARCON _";
}
break;
}
case '\"':
{//用来匹配字符常量以匹配到下一个"为结束符
ch=fgetc(fpin);
while(ch!='\"'){
arr += ch;
ch=fgetc(fpin);
}
cout<<"STRCON "<<arr<<endl;
out<<"STRCON "<<arr<<endl;
break;
}
case '\'':
{ //匹配 ' 后面的是否有字符 有则输出 CHARCON 并跳过下一个 ' else则不进行输出并跳过下一个 '
arr+=ch;
if((ch=fgetc(fpin)) !=EOF &(IsOperator(ch)|IsUpLetter(ch)|IsLowLetter(ch)|IsDigit(ch))|ch=='\_')
{
arr="";
arr=ch;
if(ch=='_'){
cout<<"CHARCON "<<ch<<endl;
fseek(fpin,1L,SEEK_CUR);
break;
}
cout<<"CHARCON "<<arr<<endl;
out<<"CHARCON "<<arr<<endl;
fseek(fpin,1L,SEEK_CUR);
break;
}
fseek(fpin,1L,SEEK_CUR);
break;
}
default :
//cout<<"\""<<ch<<"\":无法识别的字符!"<<endl
;
}
}
out.close();//释放流资源
}
int main()
{
FILE *fpin;//创建文件指针
//fpin=fopen("C:\\Users\\10512\\Desktop\\此法分析器实验\\testfile.txt","r");
fpin=fopen("testfile.txt","r");//以读的方式打开文件
cout<<"------词法分析如下------"<<endl;
analyse(fpin);
fclose(fpin);
return 0;
}
编译原理-词法分析器
最新推荐文章于 2024-06-11 17:27:40 发布