简单C语言词法分析器(C++实现)

该程序实现了对C语言中变量、整数、符号及部分保留字的词法分析,下面给出各单词符号与种别码的对应关系。

1:保留字: int,double,char,if,else,for,while,do,return,break,continue,main
2:变量:以下划线或字母开头的数字字母串,中间可混有下划线,例:_abc1_bcd
3:整数:任意的数字串,程序可将其转化为对应的整型量
4:运算符:+ - * / = == > >= < <= !=
5:分隔符:, ; :
6:界符:( ) [ ] { } ‘ ‘ “ “

代码实现:
main.c

#include <iostream>
#include "LA.h"

using namespace std;

int main() {
    char *fn = "test.c";
    LA l(fn);
    l.show();
    l.scan();
    return 0;
}

LA.h

#ifndef LEXICAL_ANALYZER_LA_H
#define LEXICAL_ANALYZER_LA_H

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>


using namespace std;

//保留字数组
static string rsv_words[12] = {"int","double","char","if","else","for","while","do","return","break","continue","main"};

class LA
{
private:
    ifstream fin;
    ostringstream sin;
    string txt; //存放读入的代码段
    string word; //存放构成单词的字符串
    int fg; //种别码
    int line; //记录行数
    int num; //存放整数
    char ch; //存放每次读入的字符
public:
    void show(){cout<<txt<<endl;};
    LA(const char * s);
    void scan();
};

#endif //LEXICAL_ANALYZER_LA_H

LA.cpp

#include <iostream>
#include "LA.h"

using namespace std;

LA::LA(const char * s):fin(s),line(1), num(0), fg(0), ch('\0')
{
    sin <<fin.rdbuf();
    txt = sin.str();
    fin.close();
    fin.clear();
}

void LA::scan()
{
    int p = 0;

    //读取第一个非空字符
    do{
        ch = txt[p++];
        if(ch == 10)
            line++;
    } while(ch == ' ' || ch == 10);
    cout<<"line "<<line<<" :"<<endl;

    while(ch != '\0'){
        while(ch == ' ' || ch == 10){
            if(ch == 10){
                line++;
                cout<<"line "<<line<<" :"<<endl;
            }
            ch = txt[p++];
        }

        //处理'_'开头的变量
        if(ch == '_'){
            word += ch;
            ch = txt[p++];
            if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')){
                fg = 2;
                word += ch;
                ch = txt[p++];
                while((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_'){
                    word += ch;
                    ch = txt[p++];
                }
            }
            else{
                fg = -1;
                word += ch;
                ch = txt[p++];
                while((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_'){
                    word += ch;
                    ch = txt[p++];
                }
            }
            switch(fg){
                case -1: cout<<"error: "<<'<'<<word<<'>'<<endl; break;
                case 2: cout<<'<'<<fg<<','<<word<<'>'<<endl; break;
                default: break;
            }
            word.clear();
            fg = 0;
        }

        //处理保留字和字母开头的变量
        if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')){
            fg = 1;
            word += ch;
            ch = txt[p++];
            while((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_'){
                word += ch;
                ch = txt[p++];
            }
            for(int i = 0; i < 12; i++){
                if(i == 11 && word != rsv_words[i])
                    fg++;
                if(word == rsv_words[i]){
                    cout<<'<'<<fg<<','<<word<<'>'<<endl;
                    word.clear();
                    break;
                }
            }
            if(fg == 2)
                cout<<'<'<<fg<<','<<word<<'>'<<endl;
            word.clear();
            fg = 0;
        }

        //处理整数
        if(ch >= '0' && ch <= '9'){
            fg = 3;
            num = 10 * num + (ch - '0');
            ch = txt[p++];
            while(ch >= '0' && ch <= '9'){
                num = 10 * num + (ch - '0');
                ch = txt[p++];
            }
            cout<<'<'<<fg<<','<<num<<'>'<<endl;
            num = 0;
            fg = 0;
        }

        //处理符号
        switch(ch){
            //处理运算符
            case '+': fg = 4; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '-': fg = 4; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '*': fg = 4; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '/': fg = 4; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '=':
                fg = 4;
                if(txt[p] == '='){
                    word += ch;
                    ch = txt[p++];
                    word += ch;
                    ch = txt[p++];
                    cout<<'<'<<fg<<','<<word<<'>'<<endl;
                    word.clear();
                    break;
                }
                else{
                    cout<<'<'<<fg<<','<<ch<<'>'<<endl;
                    ch = txt[p++];
                    break;
                }
            case '<':
                fg = 4;
                if(txt[p] == '='){
                    word += ch;
                    ch = txt[p++];
                    word += ch;
                    ch = txt[p++];
                    cout<<'<'<<fg<<','<<word<<'>'<<endl;
                    word.clear();
                    break;
                }
                else{
                    cout<<'<'<<fg<<','<<ch<<'>'<<endl;
                    ch = txt[p++];
                    break;
                }
            case '>':
                fg = 4;
                if(txt[p] == '='){
                    word += ch;
                    ch = txt[p++];
                    word += ch;
                    ch = txt[p++];
                    cout<<'<'<<fg<<','<<word<<'>'<<endl;
                    word.clear();
                    break;
                }
                else{
                    cout<<'<'<<fg<<','<<ch<<'>'<<endl;
                    ch = txt[p++];
                    break;
                }
            case '!':
                fg = 4;
                if(txt[p] == '='){
                    word += ch;
                    ch = txt[p++];
                    word += ch;
                    ch = txt[p++];
                    cout<<'<'<<fg<<','<<word<<'>'<<endl;
                    word.clear();
                    break;
                }
                else{
                    cout<<'<'<<fg<<','<<ch<<'>'<<endl;
                    ch = txt[p++];
                    break;
                }

            //处理分隔符
            case ',': fg = 5; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case ';': fg = 5; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case ':': fg = 5; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;

            //处理界符
            case '(': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case ')': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '[': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case ']': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '{': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '}': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '"': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
            case '\'': fg = 6; cout<<'<'<<fg<<','<<ch<<'>'<<endl; ch = txt[p++]; break;
        }
    }


}

测试文件 test.c

int main()
{
    int __ab12_asd23;
    int _a1 = 001, b2 = 12, abc_def;
    for(int i = 0; i <= 10; i++) {
        _a1 += b2;
        if(_a1 == 37)
            break;
        else
            continue;
    }
    return 0;
}
  • 4
    点赞
  • 43
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值