作者:朱鑫
/*C语言词法分析器,从三个方面处理:关键字,运算符,界符。并且将输出,符号,常数输出到文件,可以忽略同一行的注释
//-1 error,代表错误
//保留字
1auto 2break 3case 4char 5const 6continue 7default 8do
9double 10else 11enum 12extern 13float 14for 15goto 16if
17int 18long 19register 20return 21short 22signed 23static 24sizeof
25struct 26switch 27typedef 28union 29unsigned 30void 31volatile 32while
//运算符
33= 34==
35+ 36++ 37+=
38- 39-- 40-= 41->
42* 43*=
44/ 45/=
46% 47%=
48(
49)
50[
51]
52,
53> 54>> 55>>= 56>=
57< 58<< 59<<= 60<=
61! 62!=
63^ 64^=
65& 66&& 67&=
68| 69|| 70|=
71~
72? :
73.
//界符
74{
75}
76;
77\
//注释
78 //
//79标示符
//80常数
*/
#include<iostream>
#include<fstream>
#include<cstdlib>
using namespace std;
string Keyword[]={"","auto","break","case","char","const","continue","default","do",
"double","else","enum","extern","float","for","goto","if",
"int","long","register","return","short","signed","static","sizeof",
"struct","switch","typedef","union","unsigned","void","volatile","while"
};
string ID[1000]; //符号表
int indexOfID=0;
string Const[1000]; //常数表
int indexOfConst=0;
bool IsLetter(char ch); //判断是不是字母或下划线
bool IsDigit(char ch); //判断是不是数字
int Reserve(string strToken); //查询是否是关键字,不是的话返回0,是的话返回ID(二分查找)
void insertID(string strToken); //插入符号表
void insertConst(string strToken); //插入常数表
int main(void)
{
int code=0,lineAt=0; //ID号及行号
string line; //一行
int i;
int length=0;
string strToken="";
char ch;
ifstream in; //从文件中读取
ofstream out; //输出到文件
ofstream IDOut; //符号表
ofstream ConstOut; //常数表
in.open("in.txt",ios::in);
if(!in)
{
cerr<<"File open or create error!"<<endl;
exit(1);
}
out.open("out.txt",ios::out);
IDOut.open("IDOut.txt",ios::out);
ConstOut.open("ConstOut.txt",ios::out);
while(getline(in,line))
{
lineAt++;
if((length=line.length())==0)
{
continue;
}
i=0;
while(i<=length)
{
while(line[i]==' '||line[i]=='\t'||line[i]=='\n')
{
i++;
}
ch=line[i];
if(ch=='\0')
{
break;
}
strToken+=ch;
//字母
if(IsLetter(ch))
{
while( IsLetter(line[++i]) || IsDigit(line[i]) )
{
strToken+=line[i];
}
i--;
code=Reserve(strToken);
if(code==0)
{
code=79;
insertID(strToken);
}
}
//数字
else if(IsDigit(ch))
{
while(IsDigit(line[++i]))
{
strToken+=line[i];
}
i--;
code=80;
insertConst(strToken);
}
//等号
else if(ch=='=')
{
if(line[++i]=='=')
{
code=34;
strToken+='=';
}
else
{
i--;
code=33;
}
}
//加号
else if(ch=='+')
{
if(line[++i]=='+')
{
code=36;
strToken+='+';
}
else if(line[i]=='=')
{
code=37;
strToken+='=';
}
else
{
i--;
code=35;
}
}
//减号
else if(ch=='-')
{
if(line[++i]=='-')
{
code=39;
strToken+='-';
}
else if(line[i]=='=')
{
code=40;
strToken+='=';
}
else if(line[i]=='>')
{
code=41;
strToken+='>';
}
else
{
i--;
code=38;
}
}
//乘号
else if(ch=='*')
{
if(line[++i]=='=')
{
code=43;
strToken+='=';
}
else
{
i--;
code=42;
}
}
//除号
else if(ch=='/')
{
if(line[++i]=='=')
{
code=45;
strToken+='=';
}
else if(line[i]=='/')
{
code=78;
strToken+='/';
cout<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<" )"<<endl;
out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<" )"<<endl;
strToken="";
break;
}
else
{
i--;
code=44;
}
}
//
else if(ch=='%')
{
if(line[++i]=='=')
{
code=47;
strToken+='=';
}
else
{
i--;
code=46;
}
}
//括号
else if(ch=='(')
{
code=48;
}
else if(ch==')')
{
code=49;
}
else if(ch=='[')
{
code=50;
}
else if(ch==']')
{
code=51;
}
else if(ch==',')
{
code=52;
}
//大于号
else if(ch=='>')
{
if(line[++i]=='>')
{
strToken+='>';
if(line[++i]=='=')
{
code=55;
strToken+='=';
i++;
}
else
{
i--;
code=54;
}
}
else if(line[i]=='=')
{
code=56;
strToken+='=';
i++;
}
else
{
i--;
code=53;
}
}
//小于号
else if(ch=='<')
{
if(line[++i]=='<')
{
strToken+='<';
if(line[++i]=='=')
{
code=59;
strToken+='=';
}
else
{
i--;
code=58;
}
}
else if(line[i]=='=')
{
code=60;
strToken+='=';
}
else
{
i--;
code=57;
}
}
//非号
else if(ch=='!')
{
if(line[++i]=='=')
{
code=62;
strToken+='=';
}
else
{
i--;
code=61;
}
}
//
else if(ch=='^')
{
if(line[++i]=='=')
{
code=64;
strToken+='=';
}
else
{
i--;
code=63;
}
}
//
else if(ch=='&')
{
if(line[++i]=='&')
{
code=66;
strToken+='&';
}
else if(line[i]=='=')
{
code=67;
strToken+='=';
}
else
{
i--;
code=65;
}
}
//
else if(ch=='|')
{
if(line[++i]=='|')
{
code=69;
strToken+='|';
}
else if(line[i]=='=')
{
code=70;
strToken+='=';
}
else
{
i--;
code=68;
}
}
//
else if(ch=='~')
{
code=71;
}
else if(ch=='?')
{
code=72;
}
else if(ch=='.')
{
code=73;
}
else if(ch=='{')
{
code=74;
}
else if(ch=='}')
{
code=75;
}
else if(ch==';')
{
code=76;
}
else if(ch=='\\')
{
code=77;
}
else
{
code=-1;
}
i++;
if(strToken.length()>1)
{
cout<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<" )"<<endl;
out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<" )"<<endl;
}
else
{
cout<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<ch<<"\""<<" )"<<endl;
out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<ch<<"\""<<" )"<<endl;
}
strToken="";
}
}
for(int i=0;i<indexOfID;i++)
{
IDOut<<i<<"\t"<<ID[i]<<endl;
}
for(int i=0;i<indexOfConst;i++)
{
ConstOut<<i<<"\t"<<Const[i]<<endl;
}
in.close();
out.close();
IDOut.close();
ConstOut.close();
return 0;
}
bool IsLetter(char ch)
{
if( (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z') || ch=='_')
{
return true;
}
return false;
}
bool IsDigit(char ch)
{
if(ch>='0'&&ch<='9')
{
return true;
}
return false;
}
int Reserve(string strToken)
{
int left=1;
int right=32;
int middle;
while(left<=right)
{
middle=(left+right)/2;
if(Keyword[middle]==strToken)
{
return middle;
}
else if(Keyword[middle]>strToken)
{
right=middle-1;
}
else
{
left=middle+1;
}
}
return 0;
}
void insertID(string strToken)
{
for(int i=0;i<indexOfID;i++)
{
if(strToken==ID[i])
{
return ;
}
}
ID[indexOfID++]=strToken;
}
void insertConst(string strToken)
{
for(int i=0;i<indexOfConst;i++)
{
if(strToken==Const[i])
{
return ;
}
}
Const[indexOfConst++]=strToken;
}