编译原理实验之词法分析

内容有更新哦~~~

能识别小数,科学记数法表示的数,负数亦可。

不多解释,代码先上:


#include <cstdio>
#include <iostream>
#include <string>
#include <cstring>
#include <algorithm>
#include <cctype>
#include <fstream>
#include <map>
#include <cstdlib>
#include <cmath>
using namespace std;
const int maxn = 1002;
const int n_key = 40;
const int n_oper = 21;

char c;

string key_word[n_key] = {"auto", "enum", "restrict", "unsigned", "break",
                    "extern", "return", "void", "case", "float",
                    "short", "volatile", "char", "for", "signed",
                    "while", "const", "goto", "sizeof", "_Bool",
                    "continue", "if", "static", "_Complex", "default",
                    "inline", "struct", "_Imaginary", "do", "int",
                    "switch", "double", "long", "typedef", "else",
                    "register", "union", "main", "scanf", "printf"
};

string oper[] = {"+", "-", "*", "/", "^",
                "<", ">", "++", "--", "==",
                "*=", "/=", ">=", "<=", "<<",
                ">>", ">>=", "<<=", "%", "&",
                "^"
};

char bound[] = {',', ';', '(', ')', '[', ']', '{', '}'};

struct Word{                //词结构体
    int id;
    string value;
};

struct Num{
    int id;
    int vi;
    double vd;
};
Num n[maxn];        //数字
Word w[maxn];       //词
map<string, int> m;    //标识符
int f = 0, ff = 0;


bool is_oper(string s){
    for(int i=0; i<n_oper; i++) if(s == oper[i]) return true;
    return false;
}

bool is_bound(char c){
    for(int i=0; i<sizeof(bound); i++) if(c == bound[i]) return true;
    return false;
}

bool is_key(string s){
    for(int i=0; i<n_key; i++) if(s == key_word[i]) return true;
    return false;
}


int stoi(string s){                         //get int
    int ans = 0;
    for(int i=0; i<s.size(); i++) ans = ans * 10 + s[i] - '0';
    return ans;
}

double stof(string s){                      //get double
    long long ans = 0;
    int fd = -1, fe = -1;
    for(int i=0; i<s.size(); i++){
        if(s[i] == '.'){
            fd = i;
            continue;
        }
        if(s[i] == 'e'){
            fe = i;
            continue;
        }
        ans = ans * 10 + s[i] - '0';
    }
    if(fd != -1 && fe == -1) return double(ans)/(pow(10, s.size() - fd - 1));
    else if(fd == -1 && fe != -1){
        long long temp = ans % (long long)pow(10, s.size() - fe - 1);     //得到e后的数字
        ans /= pow(10, s.size() - fe - 1);                                //得到e前的数字
        return double(ans*pow(10, temp));
    }
    else{
        long long temp = ans % (long long)pow(10, s.size() - fe - 1);     //得到e后的数字
        ans /= pow(10, s.size() - fe - 1);                                //得到e前的数字
        long long tt = (s.size() - fd - 1) - (s.size() - fe - 1) - 1;     //得到.后的数字
        return (double)ans/pow(10, tt) * (pow(10, temp));
    }
}

void getword(char filename[]){
    freopen(filename, "r", stdin);              //重定向

    string str = "";
    int flag, is_end;
    is_end = scanf("%c", &c);
    while(is_end != EOF){
        flag = 0;
        if(isspace(c)){
            str = "";
            while(isspace(c) && is_end != EOF){   //滤空格
                is_end = scanf("%c", &c);
            }
        }
        if(isalpha(c)){                         //以字母开头
            str = "";
            while(isalnum(c) || c == '_'){
                str += c;
                is_end = scanf("%c", &c);
            }
            w[++f].value = str;
            if(is_key(str)) w[f].id = 1;        //保留字
            else{
                w[f].id = 2;                     //标识符
                m[str] ++;
            }
            flag = 1;
        }
        if(isdigit(c)){                          //数字
            int fd = 0, fe = 0, fflag = 0, is_neg = 0;
            if(str == "-") is_neg = 1;
            str = "";
            while(isdigit(c) || c == '.' || c == 'e'){
                if(c == '.') fd ++;
                if(c == 'e') fe ++;
                if(c == '.' && fe) fflag = 1;
                str += c;
                is_end = scanf("%c", &c);
            }
            if(str[str.size()-1] == '.' || str[str.size()-1] == 'e') fflag = 1;
            if(fflag){
                cout<<"错误-->不合法数字:"<<(is_neg ? "-" + str : str)<<endl;    //忽略不合法输入
            }
            else{
                if(!fd && !fe){
                    n[++ff].vi = (is_neg ? -stoi(str) : stoi(str));
                    n[ff].id = 1;
                }
                else{
                    n[++ff].vd = (is_neg ? -stof(str) : stof(str));
                    n[ff].id = 2;
                }
                w[++f].id = 3;  w[f].value = (is_neg ? "-" + str : str);
            }
            flag = 1;
        }
        if(is_bound(c)){            //界符
            str = "";
            str += c;
            w[++f].value = str;
            w[f].id = 4;

            is_end = scanf("%c", &c);
            flag = 1;
        }
        string ss = "";
        ss += c;
        if(is_oper(ss)){
            bool is_lastdigt = isdigit(str[str.size()-1]);
            str = "";
            while(is_oper(ss)){
                str += c;
                is_end = scanf("%c", &c);
                ss += c;
            }
            if(str == "-" && !is_lastdigt && isdigit(c)) continue;
            if(is_oper(str)){
                w[++f].value = str;
                w[f].id = 5;
            }
            flag = 1;
        }
        if(!flag && is_end != EOF){
            str = "";
            str += c;
            w[++f].value = str;
            w[f].id = 6;

            is_end = scanf("%c", &c);
        }
    }
    freopen("CON", "r", stdin);           //关闭重定向,恢复标准
}


void to_file1(){                        //主表
    char filename[20];
    puts("请输入词法分析主表要保存到的地址:");
    scanf("%s", filename);
    fstream out;
    out.open(filename, ios::out);
    out<<"1.保留字   2.标识符   3.数字   4.界符   5.操作符   6.其他"<<endl<<endl;
    for(int i=1; i<=f; i++) out<<w[i].id<<"  "<<w[i].value<<endl;
    out.close();
}

void to_file2(){                        //标识符表
    ofstream out;
    char filename[20];
    puts("请输入词法分析标识符表要保存到的地址:");
    scanf("%s", filename);
    out.open(filename, ios::out);
    map<string, int>::iterator it;
    for(it=m.begin(); it!=m.end(); it++) out<<it->first<<endl;
    out.close();
}

void to_file3(){                        //数字表
    ofstream out;
    char filename[20];
    puts("请输入词法分析数字表要保存到的地址:");
    scanf("%s", filename);
    out.open(filename, ios::out);
    out<<"1.int   2.double"<<endl<<endl;
    for(int i=1; i<=ff; i++) out<<n[i].id<<"  "<<(n[i].id == 1 ? n[i].vi : n[i].vd)<<endl;
    out.close();
}


int main(){
    char filename[20];
    puts("请输入您要打开的文件地址:");
    scanf("%s", filename);

    getword(filename);

    to_file1();
    to_file2();
    to_file3();

    return 0;
}



编译原理之词法分析实验终于写完了,收工啦~~~

PS:本人还是比较满意的o(^▽^)o



  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
设计思想 (1)程序主体结构部分: 说明部分 %% 规则部分 %% 辅助程序部分 (2)主体结构的说明 在这里说明部分告诉我们使用的LETTER,DIGIT, IDENT(标识符,通常定义为字母开头的字母数字串)和STR(字符串常量,通常定义为双引号括起来的一串字符)是什么意思.这部分也可以包含一些初始化代码.例如用#include来使用标准的头文件和前向说明(forward ,references).这些代码应该再标记"%{"和"%}"之间;规则部分>可以包括任何你想用来分析的代码;我们这里包括了忽略所有注释中字符的功能,传送ID名称和字符串常量内容到主调函数和main函数的功能. (3)实现原理 程序中先判断这个句语句中每个单元为关键字、常数、运算符、界符,对与不同的单词符号给出不同编码形式的编码,用以区分之。 PL/0语言的EBNF表示 <常量定义>::=<标识符>=<无符号整数>; <标识符>::=<字母>={<字母>|<数字>}; <加法运算符>::=+|- <乘法运算符>::=*|/ <关系运算符>::==|#|<|<=|>|>= <字母>::=a|b|…|X|Y|Z <数字>::=0|1|2|…|8|9 三:设计过程 1. 关键字:void,main,if,then,break,int,Char,float,include,for,while,printfscanf 并为小写。 2."+”;”-”;”*”;”/”;”:=“;”:”;”<“;”<=“;”>“;”>=“;”<>“;”=“;”(“;”)”;”;”;”#”为运算符。 3. 其他标记 如字符串,表示以字母开头的标识符。 4. 空格符跳过。 5. 各符号对应种别码 关键字分别对应1-13 运算符分别对应401-418,501-513。 字符串对应100 常量对应200 结束符# 四:举例说明 目标:实现对常量的判别 代码: digit [0-9] letter [A-Za-z] other_char [!-@\[-~] id ({letter}|[_])({letter}|{digit}|[_])* string {({letter}|{digit}|{other_char})+} int_num {digit}+ %% [ |\t|\n]+ "auto"|"double"|"int"|"struct"|"break"|"else"|"long"|"switch"|"case"|"enum"|"register"|"typedef"|"char"|"extern"|"return"|"union"|"const"|"float"|"short"|"unsigned"|"continue"|"for"|"signed"|"void"|"default"|"goto"|"sizeof"|"do"|"if"|"static"|"while"|"main" {Upper(yytext,yyleng);printf("%s,NULL\n",yytext);} \"([!-~])*\" {printf("CONST_string,%s\n",yytext);} -?{int_num}[.]{int_num}?([E][+|-]?{int_num})? {printf("CONST_real,%s\n",yytext);} "0x"?{int_num} {printf("CONST_int,%s\n",yytext);} ","|";"|"("|")"|"{"|"}"|"["|"]"|"->"|"."|"!"|"~"|"++"|"--"|"*"|"&"|"sizeof"|"/"|"%"|"+"|"-"|">"|"<"|">="|"<="|"=="|"!="|"&"|"^"|"|"|"&"|"||"|"+="|"-="|"*="|"/="|"%="|">>="|"<<="|"&="|"^="|"|="|"=" {printf("%s,NULL\n",yytext);} {id} {printf("ID,%s\n",yytext);} {digit}({letter})+ {printf("error1:%s\n",yytext);} %% #include <ctype.h> Upper(char *s,int l) { int i; for(i=0;i<l;i++) { s[i]=toupper(s[i]); } } yywrap() { return 1; } 五:DFA 六:数据测试 七:心得体会 其实匹配并不困难,主要是C++知识要求相对较高,只要把握住指针就好了。 附源程序: #include<iostream.h> #include<stdio.h> #include<stdlib.h> #include<string.h> int i,j,k,flag,number,status; /*status which is use to judge the string is keywords or not!*/ char ch; char words[10] = {" "}; char program[500]; int Scan(char program[]) { char *keywords[13] = {"void","main","if","then","break","int", "char","float","include","for","while","printf", "scanf"}; number = 0; status = 0; j = 0; ch = program[i++]; /* To handle the lettle space ands tab*/ /*handle letters*/ if ((ch >= 'a') && (ch <= 'z' )) { while ((ch >= 'a') && (ch <= 'z' )) { words[j++]=ch; ch=program[i++]; } i--; words[j++] = '\0'; for (k = 0; k < 13; k++) if (strcmp (words,keywords[k]) == 0) switch(k) { case 0:{ flag = 1; status = 1; break; } case 1:{ flag = 2; status = 1; break; } case 2:{ flag = 3; status = 1; break; } case 3:{ flag = 4; status = 1; break; } case 4:{ flag = 5; status = 1; break; } case 5:{ flag = 6; status = 1; break; } case 6:{ flag = 7; status = 1; break; } case 7:{ flag = 8; status = 1; break; } case 8:{ flag = 9; status = 1; break; } case 9:{ flag = 10; status = 1; break; } case 10:{ flag = 11; status = 1; break; } case 11:{ flag = 12; status = 1; break; } case 12:{ flag = 13; status = 1; break; } } if (status == 0) { flag = 100; } } /*handle digits*/ else if ((ch >= '0') && (ch <= '9')) { number = 0; while ((ch >= '0' ) && (ch <= '9' )) { number = number*10+(ch-'0'); ch = program[i++]; } flag = 200; i--; } /*opereation and edge handle*/ else switch (ch) { case '=':{ if (ch == '=') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 401; } else { i--; flag = 402; } break; } case'>':{ if (ch == '>') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 403; } else { i--; flag = 404; } break; } case'<':{ if (ch == '<') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 405; } else { i--; flag = 406; } break; } case'!':{ if (ch == '!') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 407; } else { i--; flag = 408; } break; } case'+':{ if (ch == '+') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 409; } else if (ch == '+') { words[j++] = ch; words[j] = '\0'; flag = 410; } else { i--; flag = 411; } break; } case'-':{ if (ch == '-') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 412; } else if( ch == '-') { words[j++] = ch; words[j] = '\0'; flag = 413; } else { i--; flag = 414; } break; } case'*':{ if (ch == '*') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 415; } else { i--; flag = 416; } break; } case'/':{ if (ch == '/') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 417; } else { i--; flag = 418; } break; } case';':{ words[j] = ch; words[j+1] = '\0'; flag = 501; break; } case'(':{ words[j] = ch; words[j+1] = '\0'; flag = 502; break; } case')':{ words[j] = ch; words[j+1] = '\0'; flag = 503; break; } case'[':{ words[j] = ch; words[j+1] = '\0'; flag = 504; break; } case']':{ words[j] = ch; words[j+1] = '\0'; flag = 505; break; } case'{':{ words[j] = ch; words[j+1] = '\0'; flag = 506; break; } case'}':{ words[j] = ch; words[j+1] = '\0'; flag = 507; break; } case':':{ words[j] = ch; words[j+1] = '\0'; flag = 508; break; } case'"':{ words[j] = ch; words[j+1] = '\0'; flag = 509; break; } case'%':{ if (ch == '%') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 510; } else { i--; flag = 511; } break; } case',':{ words[j] = ch; words[j+1] = '\0'; flag = 512; break; } case'#':{ words[j] = ch; words[j+1] = '\0'; flag = 513; break; } case'@':{ words[j] = '#'; flag = 0; break; } default:{ flag = -1; break; } } return flag; } main() { i=0; printf("please input a program end with @"); do { ch = getchar(); program[i++] = ch; }while(ch != '@'); i = 0; do{ flag = Scan(program); if (flag == 200) { printf("(%2d,%4d)",flag,number); } else if (flag == -1) { printf("(%d,error)",flag); } else { printf("(%2d,%4s)",flag,words); } }while (flag != 0); system("pause"); }

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值