[C++] 简易词法分析器

  1.5H写出来的实验,没有用自动机,因为觉得很难画,不如直接模拟。等有时间会画一个自动机出来并且根据自动机写出一个更象样的词法分析器。

  定义假设单词种别按如下形式编码:

  保留字:单词种别码均为1。输出样式示例:(1if)。

  标识符;单词种别码均为2。输出样式示例:(2x)。

  无符号整数;单词种别均码为3。输出样式示例:(310)。

  运算符:单词种别码均为4。输出样式示例:(4+)。

  分隔符:单词种别码均为5。输出样式示例:(5, ;)。

 

  这个词法分析器可以过滤“//”和"/**/"的注释。

  非常简易,如果各位发现了问题,请及时告知我。非常感谢!

 

  1 #include <bits/stdc++.h>
  2 using namespace std;
  3 
  4 const char* SAVEPATH = "./in";
  5 const char* KEYWORD[66] = {"asm","do","if","return","typedef","auto","double","inline","short","typeid","bool","dynamic_cast","int","signed","typename","break","else","long","sizeof","union","case","enum","mutable","static","unsigned","catch","explicit","namespace","static_cast","using","char","export","new","struct","virtual","class","extern","operator","switch","void","const","false","private","template","volatile","const_cast","float","protected","this","wchar_t","continue","for","public","throw","while","default","friend","register","true","delete","goto","reinterpret_cast","try"};
  6 const char* OPERATOR[33] = {"~", "->", "!", "++", "--", "'", "+", "-", "*", "/", "=", "<", "<=", ">", ">=", "!=", ">>", "<<", "==", "%", "&", "^", "|", "?", "&&", "||"};
  7 const char SPLIT[10] = {'(', ')', '{', '}', ';', ',', ':'};
  8 const char* SSPLIT[13] = {"(", ")", "{", "}", ";", ",", ":"};
  9 
 10 const int KEYWORDSIZE = 36;
 11 const int OPERATORSIZE = 26;
 12 const int SPLITSIZE = 7;
 13 
 14 
 15 inline bool isAlpha(char ch) {
 16     return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
 17 }
 18 
 19 
 20 inline bool isDigit(char ch) {
 21     return ch >= '0' && ch <= '9';
 22 }
 23 
 24 
 25 inline bool isSplit(char ch) {
 26     for(int i = 0; i < 6; i++) {
 27         if(ch == SPLIT[i]) return true;
 28     }
 29     return false;
 30 }
 31 
 32 
 33 inline bool isSign(char ch) {
 34     return ch == '+' || ch == '-' ||
 35                  ch == '*' || ch == '/' ||
 36                  ch == '=' || ch == '<' ||
 37                  ch == '>' || ch == '!';
 38 }
 39 
 40 
 41 void GetSourceFile(const char* path, vector<string>& saveVec) {
 42     string buffer;
 43     ifstream is(path);
 44     saveVec.clear();
 45     while(!is.eof()) {
 46         getline(is, buffer);
 47         saveVec.push_back(buffer);
 48     }
 49 }
 50 
 51 
 52 void _PreProcess(string line, vector<string>& result) {
 53     int lidx = 0;
 54     while(lidx < line.length()) {
 55         if(line[lidx] == ' ' || line[lidx] == '    ') {
 56             lidx++;
 57             continue;
 58         }
 59         if(isDigit(line[lidx]) || isAlpha(line[lidx])) {
 60             int ridx = lidx;
 61             while((isDigit(line[ridx]) || isAlpha(line[ridx])) &&
 62                         !isSplit(line[ridx]) && ridx < line.length() && line[ridx] != ' ' && line[ridx] != '    ') {
 63                 ridx++;
 64             }
 65             result.push_back(line.substr(lidx, ridx-lidx));
 66             lidx = ridx;
 67         }
 68         else if(isSign(line[lidx])) {
 69             int ridx = lidx;
 70             while(isSign(line[ridx]) && line[ridx] != ' ' && line[ridx] != '    ' && \
 71                         !isSplit(line[ridx]) && ridx < line.length()) {
 72                 ridx++;
 73             }
 74             result.push_back(line.substr(lidx, ridx-lidx));
 75             lidx = ridx;
 76         }
 77         else if(isSplit(line[lidx])) {
 78             result.push_back(line.substr(lidx, 1));
 79             lidx++;
 80         }
 81         else lidx++;
 82     }
 83 }
 84 
 85 
 86 void RemoveAnnotation(string& line, bool& isAnt) {
 87     if(line.length() < 2) return;
 88     if(isAnt) {
 89         for(int i = 0; i < line.length() - 1; i++) {
 90             if((line[i] == '*' && line[i+1] == '/')) {
 91                 line = line.substr(i+2, line.length()-i-2);
 92                 isAnt = false;
 93                 return;
 94             }
 95         }
 96         line = "";
 97     }
 98     else {
 99         for(int i = 0; i < line.length() - 1; i++) {
100             if((line[i] == '/' && line[i+1] == '/')) {
101                 line = line.substr(0, i);
102                 return;
103             }
104             if((line[i] == '/' && line[i+1] == '*')) {
105                 line = line.substr(0, i);
106                 isAnt = true;
107                 return;
108             }
109         }
110     }
111 }
112 
113 
114 vector<string> PreProcess(vector<string>& saveVec) {
115     vector<string> result;
116     bool isAnt = false;
117     for(auto& line : saveVec) {
118         RemoveAnnotation(line, isAnt);
119     }
120     for(auto line : saveVec) _PreProcess(line, result);
121     return result;
122 }
123 
124 
125 pair<int, string> analyse(string line) {
126     typedef pair<int, string> PIS;
127     // 保留字
128     for(int i = 0; i < KEYWORDSIZE; i++) if(strcmp(line.c_str(), KEYWORD[i]) == 0) return PIS(1, line);
129     // 运算符
130     for(int i = 0; i < OPERATORSIZE; i++) if(strcmp(line.c_str(), OPERATOR[i]) == 0) return PIS(4, line);
131     // 分隔符
132     for(int i = 0; i < SPLITSIZE; i++) if(strcmp(line.c_str(), SSPLIT[i]) == 0) return PIS(5, line);
133     bool digitFlag = false, alphaFlag = false;
134     for(auto ch : line) {
135         if(isDigit(ch)) digitFlag = true;
136         if(isAlpha(ch)) alphaFlag = true;
137     }
138     // 无符号整数
139     if(digitFlag && !alphaFlag) return PIS(3, line);
140     return PIS(2, line);
141 }
142 
143 
144 vector<pair<int, string>> Analyse(vector<string> saveVec) {
145     int lidx = 0;
146     vector<pair<int, string>> result;
147     for(auto line : saveVec) {
148         result.push_back(analyse(line));
149     }
150     return result;
151 }
152 
153 
154 int main() {
155     vector<string> saveVec;
156     vector<string> processResult;
157     vector<pair<int, string>> analyseResult;
158 
159     GetSourceFile(SAVEPATH, saveVec);
160 
161     processResult = PreProcess(saveVec);
162 
163     analyseResult = Analyse(processResult);
164 
165     for(int i = 0; i < analyseResult.size(); i++) {
166         cout << analyseResult[i].first << " " << analyseResult[i].second << endl;
167     }
168 }

 

可以试一下输入下面的内容:

 1  /*bool checkIfSatisfied(vector<pair<int, string>>& analyseResult) {
 2  // 存在不合法则返回0
 3  for(int i = 0; i < analyseResult.size(); i++) {
 4   if(analyseResult[i].first == -1) return false;
 5  }
 6  return true;
 7 }*/ sss11;
 8 
 9 
10 int main() {
11  vector<string> saveVec;
12  vector<string> processResult;
13  vector<pair<int, string>> analyseResult;
14 
15  GetSourceFile(SAVEPATH, saveVec);
16 
17  processResult = PreProcess(saveVec);
18 
19  analyseResult = Analyse(processResult);
20 
21  // if(checkIfSatisfied(analyseResult)) { 
22   for(int i = 0; i < analyseResult.size(); i++) {
23    cout << analyseResult[i].first << " " << analyseResult[i].second << endl;
24   }
25  // }
26  // else {
27   // cout << "Syntex Error !" << endl;
28  // }
29 }

 

输出的是:

2 sss11
5 ;
1 int
2 main
5 (
5 )
5 {
2 vector
4 <
2 string
4 >
2 saveVec
5 ;
2 vector
4 <
2 string
4 >
2 processResult
5 ;
2 vector
4 <
2 pair
4 <
1 int
5 ,
2 string
4 >>
2 analyseResult
5 ;
2 GetSourceFile
5 (
2 SAVEPATH
5 ,
2 saveVec
5 )
5 ;
2 processResult
4 =
2 PreProcess
5 (
2 saveVec
5 )
5 ;
2 analyseResult
4 =
2 Analyse
5 (
2 processResult
5 )
5 ;
2 for
5 (
1 int
2 i
4 =
3 0
5 ;
2 i
4 <
2 analyseResult
2 size
5 (
5 )
5 ;
2 i
4 ++
5 )
5 {
2 cout
4 <<
2 analyseResult
2 i
2 first
4 <<
4 <<
2 analyseResult
2 i
2 second
4 <<
2 endl
5 ;
5 }
5 }

 

转载于:https://www.cnblogs.com/kirai/p/6762922.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值