1.5H写出来的实验,没有用自动机,因为觉得很难画,不如直接模拟。等有时间会画一个自动机出来并且根据自动机写出一个更象样的词法分析器。
定义假设单词种别按如下形式编码:
保留字:单词种别码均为1。输出样式示例:(1, if)。
标识符;单词种别码均为2。输出样式示例:(2, x)。
无符号整数;单词种别均码为3。输出样式示例:(3, 10)。
运算符:单词种别码均为4。输出样式示例:(4, +)。
分隔符:单词种别码均为5。输出样式示例:(5, ;)。
这个词法分析器可以过滤“//”和"/**/"的注释。
非常简易,如果各位发现了问题,请及时告知我。非常感谢!
1 #include <bits/stdc++.h> 2 using namespace std; 3 4 const char* SAVEPATH = "./in"; 5 const char* KEYWORD[66] = {"asm","do","if","return","typedef","auto","double","inline","short","typeid","bool","dynamic_cast","int","signed","typename","break","else","long","sizeof","union","case","enum","mutable","static","unsigned","catch","explicit","namespace","static_cast","using","char","export","new","struct","virtual","class","extern","operator","switch","void","const","false","private","template","volatile","const_cast","float","protected","this","wchar_t","continue","for","public","throw","while","default","friend","register","true","delete","goto","reinterpret_cast","try"}; 6 const char* OPERATOR[33] = {"~", "->", "!", "++", "--", "'", "+", "-", "*", "/", "=", "<", "<=", ">", ">=", "!=", ">>", "<<", "==", "%", "&", "^", "|", "?", "&&", "||"}; 7 const char SPLIT[10] = {'(', ')', '{', '}', ';', ',', ':'}; 8 const char* SSPLIT[13] = {"(", ")", "{", "}", ";", ",", ":"}; 9 10 const int KEYWORDSIZE = 36; 11 const int OPERATORSIZE = 26; 12 const int SPLITSIZE = 7; 13 14 15 inline bool isAlpha(char ch) { 16 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 17 } 18 19 20 inline bool isDigit(char ch) { 21 return ch >= '0' && ch <= '9'; 22 } 23 24 25 inline bool isSplit(char ch) { 26 for(int i = 0; i < 6; i++) { 27 if(ch == SPLIT[i]) return true; 28 } 29 return false; 30 } 31 32 33 inline bool isSign(char ch) { 34 return ch == '+' || ch == '-' || 35 ch == '*' || ch == '/' || 36 ch == '=' || ch == '<' || 37 ch == '>' || ch == '!'; 38 } 39 40 41 void GetSourceFile(const char* path, vector<string>& saveVec) { 42 string buffer; 43 ifstream is(path); 44 saveVec.clear(); 45 while(!is.eof()) { 46 getline(is, buffer); 47 saveVec.push_back(buffer); 48 } 49 } 50 51 52 void _PreProcess(string line, vector<string>& result) { 53 int lidx = 0; 54 while(lidx < line.length()) { 55 if(line[lidx] == ' ' || line[lidx] == ' ') { 56 lidx++; 57 continue; 58 } 59 if(isDigit(line[lidx]) || isAlpha(line[lidx])) { 60 int ridx = lidx; 61 while((isDigit(line[ridx]) || isAlpha(line[ridx])) && 62 !isSplit(line[ridx]) && ridx < line.length() && line[ridx] != ' ' && line[ridx] != ' ') { 63 ridx++; 64 } 65 result.push_back(line.substr(lidx, ridx-lidx)); 66 lidx = ridx; 67 } 68 else if(isSign(line[lidx])) { 69 int ridx = lidx; 70 while(isSign(line[ridx]) && line[ridx] != ' ' && line[ridx] != ' ' && \ 71 !isSplit(line[ridx]) && ridx < line.length()) { 72 ridx++; 73 } 74 result.push_back(line.substr(lidx, ridx-lidx)); 75 lidx = ridx; 76 } 77 else if(isSplit(line[lidx])) { 78 result.push_back(line.substr(lidx, 1)); 79 lidx++; 80 } 81 else lidx++; 82 } 83 } 84 85 86 void RemoveAnnotation(string& line, bool& isAnt) { 87 if(line.length() < 2) return; 88 if(isAnt) { 89 for(int i = 0; i < line.length() - 1; i++) { 90 if((line[i] == '*' && line[i+1] == '/')) { 91 line = line.substr(i+2, line.length()-i-2); 92 isAnt = false; 93 return; 94 } 95 } 96 line = ""; 97 } 98 else { 99 for(int i = 0; i < line.length() - 1; i++) { 100 if((line[i] == '/' && line[i+1] == '/')) { 101 line = line.substr(0, i); 102 return; 103 } 104 if((line[i] == '/' && line[i+1] == '*')) { 105 line = line.substr(0, i); 106 isAnt = true; 107 return; 108 } 109 } 110 } 111 } 112 113 114 vector<string> PreProcess(vector<string>& saveVec) { 115 vector<string> result; 116 bool isAnt = false; 117 for(auto& line : saveVec) { 118 RemoveAnnotation(line, isAnt); 119 } 120 for(auto line : saveVec) _PreProcess(line, result); 121 return result; 122 } 123 124 125 pair<int, string> analyse(string line) { 126 typedef pair<int, string> PIS; 127 // 保留字 128 for(int i = 0; i < KEYWORDSIZE; i++) if(strcmp(line.c_str(), KEYWORD[i]) == 0) return PIS(1, line); 129 // 运算符 130 for(int i = 0; i < OPERATORSIZE; i++) if(strcmp(line.c_str(), OPERATOR[i]) == 0) return PIS(4, line); 131 // 分隔符 132 for(int i = 0; i < SPLITSIZE; i++) if(strcmp(line.c_str(), SSPLIT[i]) == 0) return PIS(5, line); 133 bool digitFlag = false, alphaFlag = false; 134 for(auto ch : line) { 135 if(isDigit(ch)) digitFlag = true; 136 if(isAlpha(ch)) alphaFlag = true; 137 } 138 // 无符号整数 139 if(digitFlag && !alphaFlag) return PIS(3, line); 140 return PIS(2, line); 141 } 142 143 144 vector<pair<int, string>> Analyse(vector<string> saveVec) { 145 int lidx = 0; 146 vector<pair<int, string>> result; 147 for(auto line : saveVec) { 148 result.push_back(analyse(line)); 149 } 150 return result; 151 } 152 153 154 int main() { 155 vector<string> saveVec; 156 vector<string> processResult; 157 vector<pair<int, string>> analyseResult; 158 159 GetSourceFile(SAVEPATH, saveVec); 160 161 processResult = PreProcess(saveVec); 162 163 analyseResult = Analyse(processResult); 164 165 for(int i = 0; i < analyseResult.size(); i++) { 166 cout << analyseResult[i].first << " " << analyseResult[i].second << endl; 167 } 168 }
可以试一下输入下面的内容:
1 /*bool checkIfSatisfied(vector<pair<int, string>>& analyseResult) { 2 // 存在不合法则返回0 3 for(int i = 0; i < analyseResult.size(); i++) { 4 if(analyseResult[i].first == -1) return false; 5 } 6 return true; 7 }*/ sss11; 8 9 10 int main() { 11 vector<string> saveVec; 12 vector<string> processResult; 13 vector<pair<int, string>> analyseResult; 14 15 GetSourceFile(SAVEPATH, saveVec); 16 17 processResult = PreProcess(saveVec); 18 19 analyseResult = Analyse(processResult); 20 21 // if(checkIfSatisfied(analyseResult)) { 22 for(int i = 0; i < analyseResult.size(); i++) { 23 cout << analyseResult[i].first << " " << analyseResult[i].second << endl; 24 } 25 // } 26 // else { 27 // cout << "Syntex Error !" << endl; 28 // } 29 }
输出的是:
2 sss11 5 ; 1 int 2 main 5 ( 5 ) 5 { 2 vector 4 < 2 string 4 > 2 saveVec 5 ; 2 vector 4 < 2 string 4 > 2 processResult 5 ; 2 vector 4 < 2 pair 4 < 1 int 5 , 2 string 4 >> 2 analyseResult 5 ; 2 GetSourceFile 5 ( 2 SAVEPATH 5 , 2 saveVec 5 ) 5 ; 2 processResult 4 = 2 PreProcess 5 ( 2 saveVec 5 ) 5 ; 2 analyseResult 4 = 2 Analyse 5 ( 2 processResult 5 ) 5 ; 2 for 5 ( 1 int 2 i 4 = 3 0 5 ; 2 i 4 < 2 analyseResult 2 size 5 ( 5 ) 5 ; 2 i 4 ++ 5 ) 5 { 2 cout 4 << 2 analyseResult 2 i 2 first 4 << 4 << 2 analyseResult 2 i 2 second 4 << 2 endl 5 ; 5 } 5 }