合肥工业大学编译原理实验一：词法分析器

最新推荐文章于 2023-06-03 16:45:46 发布
moonchildink
最新推荐文章于 2023-06-03 16:45:46 发布
阅读量603
点赞数
文章标签： c++ 算法开发语言
本文链接：https://blog.csdn.net/m0_63254971/article/details/130816042
版权
C文件的路径写在main()函数之中.
运行结果如下：
在这里插入图片描述
code:
//
// Created by 29236 on 2023/5/19.
//
# include<iostream>
#include<vector>
#include<fstream>
#include<string>
#include<algorithm>
#include<iomanip>

using namespace std;

typedef struct word {
    string word;
    string type;
    int index;
    int column;
    int line;
} word, *word_ptr;

/*
 * author:moonchild
 * function: 实现C语言子集的词法分析器。
 *  1. 创建关键字列表
 *      1.1 关键字：do,for,if,printf,scanf,then,while
 *      1.2 分界符：, ; ( ) [ ]
 *      1.3 算术运算符：+ - * /
 *      1.4 < <= = > >=
 */

string key_words[] = {"do", "while", "for", "true", "false", "main", "if", "else", "bool", "int", "void", "printf",
                      "scanf", "float", "double"};
char jeff[] = {',', ';', '(', ')', '[', ']', '{', '}'};
string relation_op[] = {"<", ">", "=", ">=", "<="};
string arithmetic_op[] = {"+", "-", "*", "/", "--", "++"};

string read_file(const string &filePath) {
    ifstream in(filePath, ios::in);
    string file;
    string line;

    if (!in.fail()) {
        while (getline(in, line)) {
            file.append(line);
            file.append("\n");
        }
    } else {
        cout << "文件读取失败" << endl;
        exit(0);
    }
    in.close();
    return file;
}

bool is_blank(char str) {
    if (str == '\t' || str == '\n' || str == '\v' || str == '\r' || str == '\f' || str == ' ')
        return true;
    else
        return false;
}

bool is_letter(char str) {
    if (str <= 'z' && str >= 'A')
        return true;
    else
        return false;
}

bool is_digit(char str) {
    if (str <= '9' && str >= '0')
        return true;
    else
        return false;
}

bool is_arithmetic_op(char str) {
    bool flag = std::any_of(
            std::begin(arithmetic_op), std::end(arithmetic_op), [&str](string &item) {
                string temp;
                temp += str;
                return temp == item;
            });
    return flag;
}

bool is_relation_op(char str) {
    bool flag = std::any_of(std::begin(relation_op), std::end(relation_op), [&str](string &item) {
        string temp;
        temp += str;
        return temp == item;
    });
    return flag;
}

bool is_jeff(char ch) {
    bool is_jeff = std::any_of(std::begin(jeff), std::end(jeff), [&ch](char &item) {
        return ch == item;
    });
    return is_jeff;
}

word ari_op_process(string::iterator &iter, int line, int *column_ptr) {
    char ch = *iter;
    string buffer;
    while (ch) {

        if (is_arithmetic_op(ch)) {
            if (buffer.empty()) {
                buffer.push_back(ch);
                (*column_ptr)++;
            } else if (!buffer.empty()) { // 非空
                if (buffer.length() > 1) {
                    cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
                         << endl;
                    exit(0);
                } else if (buffer.at(0) == ch) {
                    buffer.push_back(ch);
                    (*column_ptr)++;
                } else {
                    cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
                         << endl;
                    exit(0);
                }
            }
        } else {
            word words;
            words.word = buffer;
            words.type = "ari_op";
            words.column = *column_ptr;
            words.line = line;
            words.index = 6;
            return words;
        }
        ch = *(iter++);

    }
}

word letter_process(string::iterator &iter, int line, int *column_ptr) {
    char ch = *iter;
    string buffer;
    while (ch) {
        if (is_letter(ch) || is_digit(ch) || ch == '_') {
            buffer.push_back(ch);
            (*column_ptr)++;
        } else if (is_jeff(ch) || is_blank(ch)) {
            word words;
            words.type = "letter";
            words.word = buffer;
            words.line = line;
            words.column = *column_ptr;

            return words;
        } else {
            cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr << endl;
            exit(0);
        }
        iter++;
        ch = *iter;
    }
}

word num_process(string::iterator &iter, int line, int *column_ptr) {
    char ch = *iter;
    int e_count = 0;
    int dot_count = 0;
    string buffer;
    while (ch) {
        if (is_digit(ch)) {
            buffer.push_back(ch);
            (*column_ptr)++;
        } else if (ch == 'e' || ch == 'E') {
            if (e_count) {
                cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
                     << endl;
                exit(0);
            }
            e_count++;
            (*column_ptr)++;
            buffer.push_back(ch);
        } else if (ch == '.') {
            if (dot_count) {
                cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
                     << endl;
                exit(0);
            }
            dot_count++;
            (*column_ptr)++;
            buffer.push_back(ch);
        } else if (is_jeff(ch) || is_blank(ch)) {
            iter++;
            word words;
            words.word = buffer;
            words.type = "num";
            words.line = line;
            words.index = 5;
            words.column = *column_ptr;
            return words;
        } else {
            cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr << endl;
            exit(0);
        }
        iter++;
        ch = *iter;
    }

}

word relation_op_process(string::iterator &iter, int line, int *column_ptr) {
    string buffer;
    while (*iter) {
        // 处理关系运算符
        if (is_relation_op(*iter)) {
            if (buffer.empty()) {
                buffer.push_back(*iter);
                (*column_ptr)++;
            } else if (!buffer.empty()) {
                if (buffer.length() == 1 && buffer.at(0) != *iter && *iter == '=') {
                    buffer.push_back(*iter);
                    (*column_ptr)++;
                } else {
                    cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
                         << endl;
                    exit(0);
                }
            }
        } else if (is_jeff(*iter)) {
            cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr << endl;
            exit(0);
        } else {
            word words;
            words.type = "relation_op";
            words.word = buffer;
            words.line = line;
            words.index = 4;
            words.column = *column_ptr;
            return words;
        }
        iter++;
    }
}

void comments_process(string::iterator &iter) {
    while (*iter != '\n') {
        iter++;
    }
    if (*iter == '\n')
        iter++;
}

void long_comments_process(string::iterator &iter) {
    iter++;
    iter++;
    while (*iter) {
        if (*iter == '*' && *(iter + 1) == '/')
            break;
        iter++;
    }
    iter++;
    iter++;
}


void lexical_analysis(const string &file_path) {
    string file = read_file(file_path);
    auto iter = file.begin();
    int line = 1;
    int column = 1;

    word words[64] = {};
    auto word_iter = std::begin(words);
    while (*iter) {

        if (is_digit(*iter)) {
            *word_iter = num_process(iter, line, &column);
            word_iter++;
        } else if (is_letter(*iter)) {
            *word_iter = letter_process(iter, line, &column);
            word_iter++;
        } else if (is_arithmetic_op(*iter)) {
            if (*iter == '/' && *(iter + 1) == '/') {
                comments_process(iter);
                line++;
            } else if (*iter == '/' && *(iter + 1) == '*')
                long_comments_process(iter);
            else {
                *word_iter = ari_op_process(iter, line, &column);
                word_iter++;
            };
        } else if (is_relation_op(*iter)) {
            *word_iter = relation_op_process(iter, line, &column);
            word_iter++;
        } else if (*iter == '\n') {
            line++;
            iter++;
            column = 0;
        } else if (*iter == ' ') {
            iter++;
            column++;
        } else if (is_jeff(*iter)) {
            word new_word;
            new_word.type = "jeff";
            new_word.word = *iter;
            new_word.column = column;
            new_word.index = 3;
            new_word.line = line;
            *word_iter = new_word;
            word_iter++;
            iter++;
            column++;
        } else {
            cout << "Unexpected Character:" << *iter << "in line:" << line << endl;
            exit(0);
        }
    }


    cout << setw(20) << std::left << "letter             " << setw(20) << std::left << "prologue      " << std::left
         << setw(20) << "type      " << setw(20) << std::left
         << "Position(line,column) " << endl;
    for (auto it = std::begin(words); it != std::end(words) && it->word.length() > 0; it++) {
        string temp = it->word;
        bool is_keyword = std::any_of(std::begin(key_words), std::end(key_words), [&temp](string &item) {
            return temp == item;
        });
        if (is_keyword) {
            it->type = "keyword";
            it->index = 1;
        } else if (it->type == "letter") {
            it->type = "identifier";
            it->index = 2;
        }
        string tuple = "(" + std::to_string(it->index) + "," + it->word + ")";
        string position = "(" + std::to_string(it->line) + "," + std::to_string(it->column) + ")";
        cout << setw(20) << std::left << it->word << setw(20) << std::left << tuple << setw(20) << std::left
             << it->type << setw(20) << std::left << position << endl;
    }
}


int main() {

    string filePath = R"(D:\code\Compilers\target.c)";
    lexical_analysis(filePath);

}