编译原理实验一(词法分析)
#define _CRT_SECURE_NO_WARNINGS
#include <cstdio>
#include <iostream>
#include <string>
#include <map>
#include <vector>
#include <cctype>
using namespace std;
const string singleSign = "+-*/:=;()#.{}", emptyStr = " \t\n";
string text; //存文本
map<string, int> key2id;
vector<string> keystrVec = { "#", "main", "if", "then", "while", "do",
"static", "int", "double", "struct", "break",
"else", "long", "switch", "case", "typedef",
"char", "return", "const", "float", "short",
"continue", "for", "void", "sizeof", "+",
"-", "*", "/" , ":", ":=", "<", "!=", "<=", ">",
">=", "=", "default", "do", ";", "(", ")", ".", "{", "}"
}; //元素,标志符,字符串, 数字NUM
const int N = keystrVec.size(), NUM = N + 1, STR = N + 2, IDTF = N + 3, UNKNOW = N + 4, CHAR = N+5;
//初始化词法分析结果存储的容器
void initID(vector<string> keystrVec) {
for (int i = 0; i < keystrVec.size(); ++i) {
key2id[keystrVec[i]] = i;
}
}
map<string, int> key;
//读取文本,返回该文本形成的一个字串
string myread() {
string text, line;
while (getline(cin, line)) {
text += line + '\n';
}
return text;
}
void ErrorProcess(){
cout << "Grammer Error" << endl;
while (true);
}
string deleteAnnotation (string text) {
string txt;
int len = text.length(), cur = 0;
while (cur < len) {
switch (text[cur]) {
case '/':
if (cur < len && text[cur + 1] == '/') { //单行注释处理
cur++;
while (cur < len && text[++cur] != '\n');
txt += '\n';
}
else {
//断点
if (cur + 1 < len && text[cur + 1] == '*') { //判断是否为多行注释。超前搜索
++cur; //text[cur] = '*'
++cur;
while (cur < len) {
if (text[cur] == '*') {
//断点
if (cur < len && text[cur + 1] == '/') {
cur++;// text[cur] = '//';
break;
}
else ++cur;
}
else ++cur;
}
}
else { //抛出异常.
ErrorProcess();
}
}
++cur;//读取下一个字符
break;
case '\"':
txt += text[cur++];
while (cur < len) {
if (text[cur] == '\"') {
if (text[cur - 1] != '\\') {
txt += text[cur];
break; //字符串读取结束
}
else txt += text[cur++];
}
else txt += text[cur++];
}
++cur;
break;
case '\'': //这个处理小心
txt += text[cur++]; // '
if (text[cur] == '\\') //转义字符
txt += text[cur++];
txt += text[cur++]; // a
txt += text[cur];
if (text[cur++] != '\'') ErrorProcess();
break;
default:
txt += text[cur++];
//cout << txt << endl; //调试时取消注释,静态debug
break;
}
}
return txt;
}
void init() {
initID(keystrVec);
}
string dec2bin(string num) {
if (num.length() > 18) return "nan";
if (num == "0") return num;
long long n = 0;
for (int i = 0; i < num.length(); ++i) {
n = n * 10 + num[i] - '0';
}
string ret = "";
while (n > 0) {
if (n & 1) ret = "1" + ret;
else ret = "0" + ret;
n >>= 1;
}
return ret;
}
void work() {
cout << "单词符号\t\t种别码\t\t\t内码值" << endl;
cout << "---------------\t\t---------------\t\t---------------" << endl;
int len = text.length(), cur = 0;
while (cur < len) {
if (isalpha(text[cur]) || text[cur] == '_') {
string indetifier = string(1, text[cur]); //"" + text[cur]
while (++cur < len) {
if (isalnum(text[cur]) || text[cur] == '_') {
indetifier += text[cur];
}
else {
if (key2id.find(indetifier) != key2id.end())
//判别为关键字
cout << indetifier << "\t\t\t" << key2id[indetifier] << "\t\t\t" << "-" << endl;
else cout << indetifier << "\t\t\t" << IDTF << "\t\t\t" << indetifier << endl;//判断为标志符
break;
}
}
--cur;
}
else if (isdigit(text[cur])) {
string num = string(1, text[cur]);
while (isdigit(text[++cur])) {
num += text[cur];
}
cout << num << "\t\t\t" << NUM << "\t\t\t" << dec2bin(num) << endl;
--cur;
}
else if (singleSign.find(text[cur]) != -1) {
cout << text[cur] << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl;
}
else if (text[cur] == '<' || text[cur] == '>' || text[cur] == '!') {
if (cur < len && text[cur + 1] == '=') {
cout << string(1, text[cur]) + "=" << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl;
}
else {
cout << text[cur] << "\t\t\t" << key2id[string(1, text[cur])] << "\t\t\t" << "-" << endl;
}
}
else if (text[cur] == '\"') {
string str = string(1, '\"');
while (++cur < len) {
str += text[cur];
if (text[cur] == '\"') {
if (text[cur - 1] != '\\') {
cout << str << "\t\t\t" << STR << "\t\t\t" << "-" << endl;
break;
}
}
}
}
else if (text[cur] == '\'') {
string str = string(1, text[cur++]); // '
if (text[cur] == '\\') //转义字符
str += text[cur++];
str += text[cur++]; // a
str += text[cur];
cout << str << "\t\t\t" << CHAR << "\t\t\t" << '-' << endl;
}
else if (true) {
if(emptyStr.find(text[cur]) == -1)
cout << text[cur] << "\t\t\t" << UNKNOW << "\t\t\t" << "-" << endl;
}
++cur;
}
}
int main()
{
freopen("data.in", "r", stdin); //若手动输入,取消本行重定向语句
//freopen("data.out", "w", stdout);
ios::sync_with_stdio(false);
init();
cout << keystrVec.size() << endl; //查看待识别的关键字个数
text = myread();
//cout << text << endl; //查看读入的文本
text = deleteAnnotation(text);
//cout << text << endl; //查看删除注释后的文本
work();
cout << "===处理完毕===" << endl; while (true); //暂停显示结果
return 0;
}