词法分析器

要求和提示

  1. 单词的分类。可将所有标识符归为一类;将常数归为另一类;保留字和分隔符则采取一词一类。

  2. 符号表的建立。可事先建立一保留字表,以备在识别保留字时进行查询。变量名表及常数表则在词法分析过程中建立。

  3. 单词串的输出形式。所输出的每一单词,均按形如(CLASS,VALUE)的二元式编码。对于变量标识符和常数,CLASS字段为相应的类别码,VALUE字段则是该标识符、常数在其符号表中登记项的序号(要求在变量名表登记项中存放该标识符的字符串,其最大长度为四个字符;常数表登记项中则存放该整数的二进制形式。)。对于保留字和分隔号,由于采用一词一类的编码方式,所以仅需在二元式的CLASS字段上放置相应的单词的类别码,VALUE字段则为“空”。不过,为便于查看由词法分析程序所输出的单词串,也可以在CLASS字段上直接放置单词符号串本身。

分析

这里写图片描述

数据说明

这里写图片描述

source.txt 存放待分析的代码
list.txt 存放标准匹配表
ans.txt 存方生成的二元组

#define filePath "/Users/sequin_yf/Desktop/source.txt"
#define listPath "/Users/sequin_yf/Desktop/list.txt"
#define KEY 1 //start from 1
#define SAFE 10240
#define SYMBOL 32 //start from 30
#define NUMBER 2 //flag


//pretreatment source
vector <string> source, vfile;

queue <string> ret;

//key value list
map <string, long> list;

string buf;
//const char* cStr = buf.c_str();


bool doList();
void getSource(string path);
void pretreatment();
void analyse();
void judge();
bool isNum(string str);
void symbol_judge(string str);
bool doList();
void func(string path);

代码

#include "./lex.h"
vector<string>::iterator iter;
void getSource(string path) {

    ifstream file;
    try {
        //open
        file.open(path);

        //size
        file.seekg(0, ios_base::end);
        long long filesize = file.tellg();
        if(filesize > buf.max_size()-SAFE) {
            throw EOVERFLOW;
        }
        file.seekg(0, ios_base::beg);

        //copy
        while(getline(file, buf)) {
            vfile.push_back(buf);
        }
    }catch(const int err) {
        //error
        switch(err) {
            case EOVERFLOW:
                cout << "EOVERFLOW" << endl;    break;
            case EACCES:
                cout << "EACCES" << endl;   break;
            default:break;
        }
        exit(1);
    }

    //close code file
    file.close();
}

void pretreatment() {

    //for every single line
    string str;
    for(iter = vfile.begin(); iter != vfile.end(); iter++) {

        char temp[40];  //temp array for store;
        string word;
        buf = *iter;    //one line of source code file string;
        int pos = 0, i = 0; //pretreatment source index and source code index;

        //whitespace
        while (buf[i] == ' ') i++;  //eliminate the fronts of whitespaces
        while (i < buf.length()) {

            //eliminate notes
            if (buf[i] == '/' && buf[i + 1] == '*') {
                i += 2;
                while (buf[i] != '*' && buf[i + 1] != '/') {
                    i++;
                }
                i++;
            }

            //push the pretreatment word
            if (buf[i] == ' ') {
                temp[pos] = '\0';
                word = temp;
                source.push_back(word);
                pos = 0;
            }

            //whitespace in case
            while (buf[i] == ' ') i++;
            temp[pos] = buf[i++];
            pos++;

        }
        //linebreak in case
        if(pos != 0) {
            temp[pos] = '\0';
            word = temp;
            source.push_back(word);
        }
    }
}


bool doList() {

    // import match criteria
    fstream lfile;
    lfile.open(listPath);
    string str;
    int index = 0;

    try{

        //number the key words and symbol
        while(getline(lfile, str)) {
            if(index < SYMBOL) {
                list[str] = KEY + index;
            }
            else{
                list[str] = KEY + index;
            }
            index++;
        }
    }catch (int err){
        return false;
    }

    lfile.close();
    return true;
}

bool isNum(string str) {
    stringstream sin(str);
    double d;
    char c;
    if(!(sin >> d)) return false;
    if(sin >> c) return false;
    return true;
}


void analyse() {

    //for each string
    for(iter = source.begin(); iter != source.end(); iter++) {
        string temp = *iter;
        //split result string
        string ans;
        int index = 0;
        //define every char
        int flag = 0;
        //for string into char[]
        char ans_c[temp.length()];

        //for each char
        for(int i = 0; i < temp.length(); i++) {

            //if a number
             if(isdigit(temp[i])){
                 //last char is not a number
                if(flag != NUMBER && flag != 0) {
                    //make the result string,
                    ans_c[index] = '\0';
                    ans = ans_c;
                    ret.push(ans);
                    //change the flag type
                    flag = NUMBER;
                    //init index
                    index = 0;
                    //new result string
                    ans_c[index++] = temp[i];
                }
                    //last char still a number
                else{
                    //append into result string
                    flag = NUMBER;
                    ans_c[index++] = temp[i];
                }
            }

            //if a letter
            if(isalpha(temp[i])){
                //last char is not a letter
                if(flag != KEY && flag != 0) {
                    //make the result string
                    ans_c[index] = '\0';
                    ans = ans_c;
                    ret.push(ans);
                    //change flag type
                    flag = KEY;
                    //init result string
                    index = 0;
                    ans_c[index++] = temp[i];
                }
                else{
                    //append
                    flag = KEY;
                    ans_c[index++] = temp[i];
                }
            }
            //if a symbol
            if(!isalpha(temp[i]) && !isdigit(temp[i])){
                //last char is not symbol
                if(flag != SYMBOL && flag != 0) {
                    //make the result string
                    ans_c[index] = '\0';
                    ans = ans_c;
                    ret.push(ans);
                    //change flag type
                    flag = SYMBOL;
                    //init result string
                    index = 0;
                    ans_c[index++] = temp[i];
                }
                else{
                    //append
                    flag = SYMBOL;
                   ans_c[index++] = temp[i];
                }
            }

        }
        //the last result string
        ans_c[index] = '\0';
        ans = ans_c;
        ret.push(ans);
    }
}


void symbol_judge(string str) {
    //for each symbol string
    for(int i = 0; i < str.length(); i++) {
        //case of += -= *= /= != ==
        if(str[i] == '+' || str[i] == '-' || str[i] == '*' \
 || str[i] == '/' || str[i] == '!' || str[i] == '=') {
            if(str[i+1] == '=' ) {
                cout << "SYMBOL: " << str << " -> " << str[i] << \
                            str[i+1] << endl;
                i += 2;
            }
        }
        //case of ++
        if(str[i] == '+' && str[i+1] == '+'){
            char t[str.length()+1];
            t[0] = str[i];
            t[1] = str[i+1];
            t[2] = '\0';
            cout << "SYMBOL: " << str[i] << str[i+1]<< " -> " << list[t] << endl;
            i += 2;
        }
        //case of --
        if(str[i] == '-' && str[i+1] == '-'){
            char t[str.length()+1];
            t[0] = str[i];
            t[1] = str[i+1];
            t[2] = '\0';
            cout << "SYMBOL: " << str[i] << str[i+1] << " -> " << list[t] << endl;
            i += 2;
        }
        //case of <<
        if(str[i] == '<' && str[i+1] == '<'){
            char t[str.length()+1];
            t[0] = str[i];
            t[1] = str[i+1];
            t[2] = '\0';
            cout << "SYMBOL: " << str[i] << str[i+1] << " -> " << list[t] << endl;
            i += 2;
        }
        //case of >>
        if(str[i] == '>' && str[i+1] == '>'){
            char t[str.length()+1];
            t[0] = str[i];
            t[1] = str[i+1];
            t[2] = '\0';
            cout << "SYMBOL: " << str[i] << str[i+1] << " -> " << list[t] << endl;
            i += 2;
        }
        if(str[i] == ':' && str[i+1] == ':'){
            char t[str.length()+1];
            t[0] = str[i];
            t[1] = str[i+1];
            t[2] = '\0';
            cout << "SYMBOL: " << str[i] << str[i+1] << " -> " << list[t] << endl;
            i += 2;
        }
        // case of single symbol
        char t[str.length()+1];
        t[0] = str[i];
        t[1] = '\0';
        if(list[t]){
            cout << "SYMBOL: " << t << " -> " << list[t] << endl;
        }
        else{
            cout << "WORD: " << str[i] << endl;
        }

    }
}

int other = 0;
string path;
void define_func() {
    string str;
    str = ret.front();
    cout << str << endl;
    if(str[0] == '<'){
        return;
    }
    ret.pop();
    str = ret.front();
    string path = "/Users/sequin_yf/CLionProjects/lex/";
    path = path + str;
    path = path + ".h";
    cout << path << endl;
    other++;
}

void judge() {

    //for each result string
    while(!ret.empty()) {
        string str;
        //cout << str << endl;
        str = ret.front();
        ret.pop();

        //is number string
        if(isNum(str)) {
            cout << "NUMBER: " << str << endl;
        }
            //can match ,is key word or symbol
        else if(list[str]){
            if(list[str] < SYMBOL) {
                //include
//                if(str == "include"){
//                    define_func();
//                    if(other) continue;
//                }
                cout << "KEY: "<< str << " -> " <<  list[str] << endl;
            }
            else{
                cout << "SYMBOL: " << str << " -> " << list[str] << endl;
            }
        }
            //cant match, is symbol or word
        else {
            //word
            if(isalpha(str[0]))
                cout << "WORD: " << str << endl;
                //symbol
            else{
               symbol_judge(str);
            }
        }
    }

}

void func(string path){
   freopen("/Users/sequin_yf/Desktop/ans.txt", "w", stdout);
    doList();   //key words and symbol match
    getSource(filePath); //open file
    pretreatment(); //eliminate white space and notes ,pick up vaild strings
    analyse(); //split each strings by num, symbol and alpha
    judge(); //get results

}

int main() {
  //  cout << "start" << endl;
    func(filePath);
//    cout << other << endl;
//    while(other--) {
//        cout << path << endl;
//        //func(path);
//    }
    cout << "OVER" << endl;
    return 0;
}
  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值