(1) 关键字:
for if then else while do until int input output
所有关键字都是小写。
(2)运算符和分隔符:
: = + - * / < > <= <> >= ; ( ) #
(2) 其他标识符(ID)和整型常数(NUM),通过以下模式定义:
ID=letter(letter | digit)*
NUM=digit digit*
(4)空格由空白、制表符和换行符组成。空格一般用来分隔ID、NUM、运算符、分隔符和关键字,词法分析阶段通常被忽略。
各种词法单元对应的词法记号如下:
词法单元 | 词法记号 | 词法单元 | 词法记号 |
---|---|---|---|
for | 1 | : | 17 |
if | 2 | := | 18 |
then | 3 | < | 20 |
else | 4 | <> | 21 |
while | 5 | <= | 22 |
do | 6 | > | 23 |
letter(letter+digit)* | 10 | >= | 24 |
digit digit* | 11 | = | 25 |
+ | 13 | ; | 26 |
- | 14 | ( | 27 |
* | 15 | ) | 28 |
/ | 16 | # | 0 |
until | 29 | int | 30 |
input | 31 | output | 32 |
词法分析程序的功能
输入:源程序
输出:二元组(词法记号,属性值/其在符号表中的位置)构成的序列。
代码:
#include<iostream>
#include<map>
#include<string>
#include<vector>
int main(){
std::map<std::string,int> dict{{"for",1},{"if",2},{"then",3},{"else",4},{"while",5},{"do",6},{"+",13},{"-",14},{"*",15},{"/",16},{"until",29},{"input",31},{":",17},{":=",18},{"<",20},{"<>",21},{"<=",22},{">",23},{">=",24},{"=",25},{";",26},{"(",27},{")",28},{"#",0},{"int",30},{"output",32}};
std::vector<std::pair<int, std::string>> seqOut;
std::string buffer;
std::string s;
getline(std::cin,s);
s.push_back('\0');
bool isNum;
for(auto c=s.begin();c!=s.end();++c){
if(!buffer.empty()){//where exists num or word
if(isNum and (*c<'0' or *c>'9')){
seqOut.emplace_back(11,buffer);
buffer.clear();
--c;
}
else if(not('a'<=*c and *c<='z' or *c >= 'A' and *c <= 'Z' or '0' <= *c and *c <= '9')){ //not word or num
if(dict[buffer]!=0){
seqOut.emplace_back(dict[buffer],buffer);
}
else{
seqOut.emplace_back(10,buffer);
}
buffer.clear();
--c;
}
else{
buffer+=*c;
}
}
else{//where is the first of all
if(*c==' ') continue;
buffer+=*c;
int fir = dict[buffer];
buffer+=*(c+1);
int sec = dict[buffer];
if(0<fir and fir<sec){//double length ch
seqOut.emplace_back(sec,buffer);
buffer.clear();
++c;
}
else if(fir>0 and sec==0){ //single length ch
buffer.pop_back();
seqOut.emplace_back(fir,buffer);
buffer.clear();
}
else if(fir==0){ //num or word
buffer.pop_back();
isNum = '0' <= *c and *c <= '9';
}
}
}
//output
for(auto word:seqOut){
std::cout<<"("<<word.first<<","<<word.second<<")"<<std::endl;
}
}