编译原理词法分析器

算法描述

对于给出的源代码,我们按行将其读入,对于每一行单独进行词法分析。

  1. 过滤行前后空格
  2. 对字符串进行词语的分割
    • 有空格则把空格前的字符归为一个词
    • 比较上一个字符和当前字符是否需要进行分割
  3. 检查词语是否合法
  4. 词语合法则按 [待测代码中的单词符号] [TAB] <[单词符号种别],[单词符号内容]> 进行输出,其中,单词符号种别为 KW(关键字)、OP(运算符)、SE(界符)、IDN(标识符)INT(整形数);单词符号内容 KW、OP、SE 为其编号(见单词表),其余为其值。
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <sstream>

using namespace std;

const int WORD_NUM = 26;
const string WORD[WORD_NUM] = {
    "int", "void", "return", "const", "main", "struct", "+",  "-",  "*",  "/",
    "%",   "=",    ">",      "<",     "==",   "<=", ">=", "!=", "&&",
    "||",  "(",    ")",      "{",     "}",    ";",  ",",
};
const string OPERATOR = "+-*/%><=&|";
const string SEPARATER = "(){};,[]";
int kws = 0, kwe = 6, ops = 6, ope = 20, ses = 20, see = 26;

class Analyzer {
  private:
    vector<string> lines;
    vector<string> token;
    string fileName;
    ofstream fout;

    int isWord(string word) {
        for(int i = 0; i < WORD_NUM; i++) {
            if(word == WORD[i])
                return i;
        }
        return -1;
    }
    bool isKeyWord(int idx) {
        return kws <= idx && idx < kwe;
    }
    bool isOperator(int idx) {
        return ops <= idx && idx < ope;
    }
    bool isOperator(char ch) {
        return OPERATOR.find(ch) != OPERATOR.npos;
    }
    bool isSeparater(int idx) {
        return ses <= idx && idx < see;
    }
    bool isSeparater(char ch) {
        return SEPARATER.find(ch) != SEPARATER.npos;
    }
    inline bool isNumber(char ch) {
        return ch >= '0' && ch <='9';
    }
    bool isInt(string word) {
        for(int i = 0; i < word.size(); i++) {
            if(!isNumber(word[i]))
                return false;
        }
        return true;
    }
    inline bool isCharacter(char ch) {
        return ch >= 'a' && ch <= 'z' || ch >='A' && ch <= 'Z';
    }
    bool isPartOfIdentifier(char c) {
        return isCharacter(c) || isNumber(c) || c == '_';
    }
    bool isIdentifier(string word) {
        if(isNumber(word[0])) {
            return false;
        }
        for(int i = 1; i < word.size(); i++) {
            if(!isPartOfIdentifier(word[i]))
                return false;
        }
        return true;
    }
    //输出
    inline void record(string word, string type, string content) {
        char TAB = '\t';
        string msg = word + TAB + "<" + type + "," + content + ">";
        fout << msg << endl;
        token.push_back(msg);
    }
    //int 转 string
    string to_string(int val) {
        stringstream ss;
        ss << val;
        string result;
        ss >> result;
        return result;
    }
    //分析一个单词
    bool anaylyseWord(string word) {
        if(word.empty()) {
            return true;
        }
        int idx = isWord(word);
        if(idx > -1) {
            string type;
            if(isKeyWord(idx)) type = "KW";
            if(isOperator(idx)) type = "OP";
            if (isSeparater(idx)) type = "SE";
            record(word, type, to_string(idx + 1));
            return true;
        } else {
            if(isIdentifier(word)) {
                record(word,"IND", word);
                return true;
            }
            if(isInt(word)) {
                record(word,"INT", word);
                return true;
            }
        }
        fout << "ERROR detected!" << endl;
        cout << "ERROR detected!" << endl;
        return false;
    }
    //去除字符串前后空格
    string trim(string s) { 
        if(s == "") {
            return "";
        }
        int l = 0, r = s.size() - 1;
        while(s[l] == ' ' && l < s.size()) l++;
        while(s[r] == ' ' && r > l) r--;
        return s.substr(l,r + 1);
    }

    //判断两个相邻字符是否需要分割
    bool check(char a, char b) {
        if ((isOperator(a) && !isOperator(b)) ||
            (!isOperator(a) && isOperator(b)) || isSeparater(a) ||
            (!isSeparater(a) && isSeparater(b)))
            return false;
        return true;

    }

  public:
    Analyzer(string fileName) {
        readFile(fileName);
    }
    ~Analyzer() {
        fout.close();
    }
    vector<string> getToken() {
        return token;
    }

    void readFile(string fileName) {
        this->fileName = fileName;
        fstream fin(fileName.c_str());
        if (!fin.is_open()) {
            throw "无法打开文件";
        }
        string line;
        while (getline(fin, line)) {
            line = trim(line);
            if(!line.empty())
                lines.push_back(line);
        }
        fin.close();
        // fout.open("token.txt");
        fout.open(fileName.substr(0,fileName.find_last_of(".")) + ".out");
    }

    void analyse() {
        int l = 0;
        string word = "";
        while(l < lines.size()) {
            string line = lines[l++]; //读入一行
            word.clear();
            for(int i = 0; i < line.size(); i++) {
                if(line[i] == ' ' || line[i] == '\t') { //分割单词
                    if(!anaylyseWord(word)) return; //判断单词是否合法并打印
                    word.clear();
                    continue;
                }
                if(!check(word[word.size() - 1], line[i])) { //分割单词
                    if(!anaylyseWord(word)) return; //判断单词是否合法并打印
                    word.clear();
                }
                word += line[i]; 
            }
            anaylyseWord(word); //到行末结束后,将剩余的拼成一个单词
        }
    }
};

int main() {
    try {
        Analyzer analyzer("a.sy");
        analyzer.analyse();
        system("pause");
    } catch (const char *msg) {
        cout << msg << endl;
    }
    return 0;
}

算法NFA和DFA及单词表

请添加图片描述
请添加图片描述

单词符号种类种别码
int关键字1
void关键字2
return关键字3
const关键字4
main关键字5
struct关键字6
+运算符7
-运算符8
*运算符9
/运算符10
%运算符11
=运算符12
<运算符13
>运算符14
==运算符15
<=运算符16
>=运算符17
!=运算符18
&&运算符19
||运算符20
(界符21
)界符22
{界符23
}界符24
;界符25
,界符26
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值