词法分析器

作者:朱鑫

邮箱:zhuxin@nwsuaf.edu.cn

/*C语言词法分析器,从三个方面处理:关键字,运算符,界符。并且将输出,符号,常数输出到文件,可以忽略同一行的注释
//-1 error,代表错误

//保留字
1auto       2break      3case       4char       5const          6continue       7default        8do
9double     10else      11enum      12extern    13float         14for           15goto          16if
17int       18long      19register  20return    21short         22signed        23static        24sizeof
25struct    26switch    27typedef   28union     29unsigned      30void          31volatile      32while

//运算符
33= 34==
35+ 36++ 37+=
38- 39-- 40-= 41->
42* 43*=
44/ 45/=
46% 47%=
48(
49)
50[
51]
52,
53> 54>> 55>>= 56>=
57< 58<< 59<<= 60<=
61! 62!=
63^ 64^=
65& 66&& 67&=
68| 69|| 70|=
71~
72? :
73.

//界符
74{
75}
76;
77\

//注释
78 //

//79标示符

//80常数

*/
#include<iostream>
#include<fstream>
#include<cstdlib>
using namespace std;
string Keyword[]={"","auto","break","case","char","const","continue","default","do",
                     "double","else","enum","extern","float","for","goto","if",
                     "int","long","register","return","short","signed","static","sizeof",
                     "struct","switch","typedef","union","unsigned","void","volatile","while"
                 };
string ID[1000];                        //符号表
int indexOfID=0;
string Const[1000];                     //常数表
int indexOfConst=0;
bool IsLetter(char ch);                 //判断是不是字母或下划线
bool IsDigit(char ch);                  //判断是不是数字
int Reserve(string strToken);           //查询是否是关键字,不是的话返回0,是的话返回ID(二分查找)
void insertID(string strToken);         //插入符号表
void insertConst(string strToken);      //插入常数表
int main(void)
{
    int code=0,lineAt=0;                //ID号及行号
    string line;                        //一行
    int i;
    int length=0;
    string strToken="";
    char ch;
    ifstream in;                        //从文件中读取
    ofstream out;                       //输出到文件
    ofstream IDOut;                     //符号表
    ofstream ConstOut;                  //常数表
    in.open("in.txt",ios::in);
    if(!in)
    {
        cerr<<"File open or create error!"<<endl;
        exit(1);
    }
    out.open("out.txt",ios::out);
    IDOut.open("IDOut.txt",ios::out);
    ConstOut.open("ConstOut.txt",ios::out);
    while(getline(in,line))
    {
        lineAt++;
        if((length=line.length())==0)
        {
            continue;
        }
        i=0;
        while(i<=length)
        {
            while(line[i]==' '||line[i]=='\t'||line[i]=='\n')
            {
                i++;
            }
            ch=line[i];
            if(ch=='\0')
            {
                break;
            }
            strToken+=ch;
            //字母
            if(IsLetter(ch))
            {
                while( IsLetter(line[++i]) || IsDigit(line[i]) )
                {
                    strToken+=line[i];
                }
                i--;
                code=Reserve(strToken);
                if(code==0)
                {
                    code=79;
                    insertID(strToken);
                }
            }
            //数字
            else if(IsDigit(ch))
            {
                while(IsDigit(line[++i]))
                {
                    strToken+=line[i];
                }
                i--;
                code=80;
                insertConst(strToken);
            }
            //等号
            else if(ch=='=')
            {
                if(line[++i]=='=')
                {
                    code=34;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=33;
                }
            }
            //加号
            else if(ch=='+')
            {
                if(line[++i]=='+')
                {
                    code=36;
                    strToken+='+';
                }
                else if(line[i]=='=')
                {
                    code=37;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=35;
                }
            }
            //减号
            else if(ch=='-')
            {
                if(line[++i]=='-')
                {
                    code=39;
                    strToken+='-';
                }
                else if(line[i]=='=')
                {
                    code=40;
                    strToken+='=';
                }
                else if(line[i]=='>')
                {
                    code=41;
                    strToken+='>';
                }
                else
                {
                    i--;
                    code=38;
                }
            }
            //乘号
            else if(ch=='*')
            {
                if(line[++i]=='=')
                {
                    code=43;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=42;
                }
            }
            //除号
            else if(ch=='/')
            {
                if(line[++i]=='=')
                {
                    code=45;
                    strToken+='=';
                }
                else if(line[i]=='/')
                {
                    code=78;
                    strToken+='/';
                    cout<<"(    "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<"  )"<<endl;
                    out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<"  )"<<endl;
                    strToken="";
                    break;
                }
                else
                {
                    i--;
                    code=44;
                }
            }
            //
            else if(ch=='%')
            {
                if(line[++i]=='=')
                {
                    code=47;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=46;
                }
            }
            //括号
            else if(ch=='(')
            {
                code=48;
            }
            else if(ch==')')
            {
                code=49;
            }
            else if(ch=='[')
            {
                code=50;
            }
            else if(ch==']')
            {
                code=51;
            }
            else if(ch==',')
            {
                code=52;
            }
            //大于号
            else if(ch=='>')
            {
                if(line[++i]=='>')
                {
                    strToken+='>';
                    if(line[++i]=='=')
                    {
                        code=55;
                        strToken+='=';
                        i++;
                    }
                    else
                    {
                        i--;
                        code=54;
                    }
                }
                else if(line[i]=='=')
                {
                    code=56;
                    strToken+='=';
                    i++;
                }
                else
                {
                    i--;
                    code=53;
                }
            }
            //小于号
            else if(ch=='<')
            {
                if(line[++i]=='<')
                {
                    strToken+='<';
                    if(line[++i]=='=')
                    {
                        code=59;
                        strToken+='=';
                    }
                    else
                    {
                        i--;
                        code=58;
                    }
                }
                else if(line[i]=='=')
                {
                    code=60;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=57;
                }
            }
            //非号
            else if(ch=='!')
            {
                if(line[++i]=='=')
                {
                    code=62;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=61;
                }
            }
            //
            else if(ch=='^')
            {
                if(line[++i]=='=')
                {
                    code=64;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=63;
                }
            }
            //
            else if(ch=='&')
            {
                if(line[++i]=='&')
                {
                    code=66;
                    strToken+='&';
                }
                else if(line[i]=='=')
                {
                    code=67;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=65;
                }
            }
            //
            else if(ch=='|')
            {
                if(line[++i]=='|')
                {
                    code=69;
                    strToken+='|';
                }
                else if(line[i]=='=')
                {
                    code=70;
                    strToken+='=';
                }
                else
                {
                    i--;
                    code=68;
                }
            }
            //
            else if(ch=='~')
            {
                code=71;
            }
            else if(ch=='?')
            {
                code=72;
            }
            else if(ch=='.')
            {
                code=73;
            }
            else if(ch=='{')
            {
                code=74;
            }
            else if(ch=='}')
            {
                code=75;
            }
            else if(ch==';')
            {
                code=76;
            }
            else if(ch=='\\')
            {
                code=77;
            }
            else
            {
                code=-1;
            }
            i++;
            if(strToken.length()>1)
            {
                cout<<"(    "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<"  )"<<endl;
                out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<"  )"<<endl;
            }
            else
            {
                cout<<"(    "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<ch<<"\""<<"    )"<<endl;
                out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<ch<<"\""<<" )"<<endl;
            }
            strToken="";
        }
    }
    for(int i=0;i<indexOfID;i++)
    {
        IDOut<<i<<"\t"<<ID[i]<<endl;
    }
    for(int i=0;i<indexOfConst;i++)
    {
        ConstOut<<i<<"\t"<<Const[i]<<endl;
    }
    in.close();
    out.close();
    IDOut.close();
    ConstOut.close();
    return 0;
}
bool IsLetter(char ch)
{
    if( (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z') || ch=='_')
    {
        return true;
    }
    return false;
}
bool IsDigit(char ch)
{
    if(ch>='0'&&ch<='9')
    {
        return true;
    }
    return false;
}
int Reserve(string strToken)
{
    int left=1;
    int right=32;
    int middle;
    while(left<=right)
    {
        middle=(left+right)/2;
        if(Keyword[middle]==strToken)
        {
            return middle;
        }
        else if(Keyword[middle]>strToken)
        {
            right=middle-1;
        }
        else
        {
            left=middle+1;
        }
    }
    return 0;
}
void insertID(string strToken)
{
    for(int i=0;i<indexOfID;i++)
    {
        if(strToken==ID[i])
        {
            return ;
        }
    }
    ID[indexOfID++]=strToken;
}
void insertConst(string strToken)
{
    for(int i=0;i<indexOfConst;i++)
    {
        if(strToken==Const[i])
        {
            return ;
        }
    }
    Const[indexOfConst++]=strToken;
}

 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值