C文件的路径写在main()
函数之中.
运行结果如下:
code:
//
// Created by 29236 on 2023/5/19.
//
# include<iostream>
#include<vector>
#include<fstream>
#include<string>
#include<algorithm>
#include<iomanip>
using namespace std;
typedef struct word {
string word;
string type;
int index;
int column;
int line;
} word, *word_ptr;
/*
* author:moonchild
* function: 实现C语言子集的词法分析器。
* 1. 创建关键字列表
* 1.1 关键字:do,for,if,printf,scanf,then,while
* 1.2 分界符:, ; ( ) [ ]
* 1.3 算术运算符:+ - * /
* 1.4 < <= = > >=
*/
string key_words[] = {"do", "while", "for", "true", "false", "main", "if", "else", "bool", "int", "void", "printf",
"scanf", "float", "double"};
char jeff[] = {',', ';', '(', ')', '[', ']', '{', '}'};
string relation_op[] = {"<", ">", "=", ">=", "<="};
string arithmetic_op[] = {"+", "-", "*", "/", "--", "++"};
string read_file(const string &filePath) {
ifstream in(filePath, ios::in);
string file;
string line;
if (!in.fail()) {
while (getline(in, line)) {
file.append(line);
file.append("\n");
}
} else {
cout << "文件读取失败" << endl;
exit(0);
}
in.close();
return file;
}
bool is_blank(char str) {
if (str == '\t' || str == '\n' || str == '\v' || str == '\r' || str == '\f' || str == ' ')
return true;
else
return false;
}
bool is_letter(char str) {
if (str <= 'z' && str >= 'A')
return true;
else
return false;
}
bool is_digit(char str) {
if (str <= '9' && str >= '0')
return true;
else
return false;
}
bool is_arithmetic_op(char str) {
bool flag = std::any_of(
std::begin(arithmetic_op), std::end(arithmetic_op), [&str](string &item) {
string temp;
temp += str;
return temp == item;
});
return flag;
}
bool is_relation_op(char str) {
bool flag = std::any_of(std::begin(relation_op), std::end(relation_op), [&str](string &item) {
string temp;
temp += str;
return temp == item;
});
return flag;
}
bool is_jeff(char ch) {
bool is_jeff = std::any_of(std::begin(jeff), std::end(jeff), [&ch](char &item) {
return ch == item;
});
return is_jeff;
}
word ari_op_process(string::iterator &iter, int line, int *column_ptr) {
char ch = *iter;
string buffer;
while (ch) {
if (is_arithmetic_op(ch)) {
if (buffer.empty()) {
buffer.push_back(ch);
(*column_ptr)++;
} else if (!buffer.empty()) { // 非空
if (buffer.length() > 1) {
cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
<< endl;
exit(0);
} else if (buffer.at(0) == ch) {
buffer.push_back(ch);
(*column_ptr)++;
} else {
cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
<< endl;
exit(0);
}
}
} else {
word words;
words.word = buffer;
words.type = "ari_op";
words.column = *column_ptr;
words.line = line;
words.index = 6;
return words;
}
ch = *(iter++);
}
}
word letter_process(string::iterator &iter, int line, int *column_ptr) {
char ch = *iter;
string buffer;
while (ch) {
if (is_letter(ch) || is_digit(ch) || ch == '_') {
buffer.push_back(ch);
(*column_ptr)++;
} else if (is_jeff(ch) || is_blank(ch)) {
word words;
words.type = "letter";
words.word = buffer;
words.line = line;
words.column = *column_ptr;
return words;
} else {
cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr << endl;
exit(0);
}
iter++;
ch = *iter;
}
}
word num_process(string::iterator &iter, int line, int *column_ptr) {
char ch = *iter;
int e_count = 0;
int dot_count = 0;
string buffer;
while (ch) {
if (is_digit(ch)) {
buffer.push_back(ch);
(*column_ptr)++;
} else if (ch == 'e' || ch == 'E') {
if (e_count) {
cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
<< endl;
exit(0);
}
e_count++;
(*column_ptr)++;
buffer.push_back(ch);
} else if (ch == '.') {
if (dot_count) {
cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
<< endl;
exit(0);
}
dot_count++;
(*column_ptr)++;
buffer.push_back(ch);
} else if (is_jeff(ch) || is_blank(ch)) {
iter++;
word words;
words.word = buffer;
words.type = "num";
words.line = line;
words.index = 5;
words.column = *column_ptr;
return words;
} else {
cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr << endl;
exit(0);
}
iter++;
ch = *iter;
}
}
word relation_op_process(string::iterator &iter, int line, int *column_ptr) {
string buffer;
while (*iter) {
// 处理关系运算符
if (is_relation_op(*iter)) {
if (buffer.empty()) {
buffer.push_back(*iter);
(*column_ptr)++;
} else if (!buffer.empty()) {
if (buffer.length() == 1 && buffer.at(0) != *iter && *iter == '=') {
buffer.push_back(*iter);
(*column_ptr)++;
} else {
cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr
<< endl;
exit(0);
}
}
} else if (is_jeff(*iter)) {
cout << "Unexpected Character:" << *iter << " in line:" << line << ",in column: " << *column_ptr << endl;
exit(0);
} else {
word words;
words.type = "relation_op";
words.word = buffer;
words.line = line;
words.index = 4;
words.column = *column_ptr;
return words;
}
iter++;
}
}
void comments_process(string::iterator &iter) {
while (*iter != '\n') {
iter++;
}
if (*iter == '\n')
iter++;
}
void long_comments_process(string::iterator &iter) {
iter++;
iter++;
while (*iter) {
if (*iter == '*' && *(iter + 1) == '/')
break;
iter++;
}
iter++;
iter++;
}
void lexical_analysis(const string &file_path) {
string file = read_file(file_path);
auto iter = file.begin();
int line = 1;
int column = 1;
word words[64] = {};
auto word_iter = std::begin(words);
while (*iter) {
if (is_digit(*iter)) {
*word_iter = num_process(iter, line, &column);
word_iter++;
} else if (is_letter(*iter)) {
*word_iter = letter_process(iter, line, &column);
word_iter++;
} else if (is_arithmetic_op(*iter)) {
if (*iter == '/' && *(iter + 1) == '/') {
comments_process(iter);
line++;
} else if (*iter == '/' && *(iter + 1) == '*')
long_comments_process(iter);
else {
*word_iter = ari_op_process(iter, line, &column);
word_iter++;
};
} else if (is_relation_op(*iter)) {
*word_iter = relation_op_process(iter, line, &column);
word_iter++;
} else if (*iter == '\n') {
line++;
iter++;
column = 0;
} else if (*iter == ' ') {
iter++;
column++;
} else if (is_jeff(*iter)) {
word new_word;
new_word.type = "jeff";
new_word.word = *iter;
new_word.column = column;
new_word.index = 3;
new_word.line = line;
*word_iter = new_word;
word_iter++;
iter++;
column++;
} else {
cout << "Unexpected Character:" << *iter << "in line:" << line << endl;
exit(0);
}
}
cout << setw(20) << std::left << "letter " << setw(20) << std::left << "prologue " << std::left
<< setw(20) << "type " << setw(20) << std::left
<< "Position(line,column) " << endl;
for (auto it = std::begin(words); it != std::end(words) && it->word.length() > 0; it++) {
string temp = it->word;
bool is_keyword = std::any_of(std::begin(key_words), std::end(key_words), [&temp](string &item) {
return temp == item;
});
if (is_keyword) {
it->type = "keyword";
it->index = 1;
} else if (it->type == "letter") {
it->type = "identifier";
it->index = 2;
}
string tuple = "(" + std::to_string(it->index) + "," + it->word + ")";
string position = "(" + std::to_string(it->line) + "," + std::to_string(it->column) + ")";
cout << setw(20) << std::left << it->word << setw(20) << std::left << tuple << setw(20) << std::left
<< it->type << setw(20) << std::left << position << endl;
}
}
int main() {
string filePath = R"(D:\code\Compilers\target.c)";
lexical_analysis(filePath);
}