编译原理语法分析

最新推荐文章于 2024-08-05 10:57:04 发布
程序猿的探索之路
最新推荐文章于 2024-08-05 10:57:04 发布
阅读量550
点赞数
分类专栏：编译原理 c++
本文链接：https://blog.csdn.net/nyist_yangguang/article/details/114585052
版权
c++ 同时被 2 个专栏收录
61 篇文章 1 订阅
订阅专栏
编译原理
11 篇文章 2 订阅
订阅专栏
上接词法分析

递归下降的方法实现算术表达式的语法分析器：

三个函数在词法分析的基础上，反复调用，指针沿着词法分析得到的二元组一直后移，

中间加一些判断条件不符合就报错。

测试程序：

// pL/0语言词法分析器
#include<bits/stdc++.h>
using namespace std;

ifstream infile("c.txt");//词法分析的结果或语法分析的输入
string str;//string变量进行字符识别
string sym; //指针
string sym1;
int temp=0;

void expressionAnalysis();//表达式分析
void termAnaysis();//项分析
void factorAnalysis();//因子分析
int advance();

int conterr=0;//记录错误
int lpnum=0;//记录左括号
int found;//提取字符串中指针的位置
int flag=0;//记录往后移动一个指针SYM是否正确
string s;//用来保存要分析的字符串
struct _2tup
{
    string token;
    int id;
};

int advance() //SYM的移动
{
    if(!getline(infile,str)) //从文件中提取字符
    {
        return 0;
    }
    found=str.find(',',0);

    if(found==-1) //当为error的时候，没有‘，’
    {
        conterr++;
        cout<<"语法错误 识别字符错误"<<endl;
        return -1;
    }
    if(temp==1)
        sym1=sym;
    sym=str.substr(1,found-1);

//    cout<<str.substr(found+1,4)<<endl;

    if(temp==0)
    {
        sym1=sym;
        temp=1;
    }
//    cout<<sym<<"  "<<sym1<<endl;
   if(sym=="23" && sym1=="24")
   {
       conterr++;
       cout<<"')'和'('中间缺少运算符\n";
       return -1;
   }
   if(sym=="24" && sym1=="23")
   {
       conterr++;
       cout<<"'('和')'中间缺少项\n";
       return -1;
   }
   if(sym=="24"&&(sym1!="28"&&sym1!="29"))
   {
       conterr++;
       cout<<"')'前面缺少项\n";
       return -1;
   }
   if(sym1=="24"&&(sym=="28"||sym=="29"))
   {
       conterr++;
       cout<<"')'后缺少运算符\n";
       return -1;

   }
   if(sym1=="23"&&(sym=="12"||sym=="13"||sym=="14"||sym=="15"))
   {
       conterr++;
       cout<<"'('后缺少项\n";
       return -1;

   }



//    cout<<sym<<endl;
    return 1;
}

void factorAnalysis()
{
//    cout<<"factor\n"<<sym<<endl;
//    cout<<sym<<endl;
    if(sym=="28"||sym=="29") //如果是标识符和无符号整数，指针就往后移动
    {
        flag=advance();
        if(conterr)
        {
            return;
        }
        if(sym=="23")
        {
            conterr++;
            cout<<"'('前缺少运算符\n";
            return;
        }
        if(lpnum==0&&sym=="24")
        {
            conterr++;
            cout<<"语法错误 ')'不匹配"<<endl;
            return;
        }
    }
    else if(sym=="23") //如果是左括号，就要接下来判断是否为表达式，指针往后移动
    {
//        cout<<"hello"<<sym1<<endl;
        if(sym1=="28"||sym1=="29")
        {
            cout<<"(前面缺少运算符"<<endl;
            conterr++;
            return ;
        }
        lpnum++;
//        cout<<lpnum<<endl;
        flag=advance();
        if(conterr)
        {
            return;
        }
        if(flag==0) //当为最后一个标志的时候，若没有右括号匹配就错误
        {
            conterr++;
            cout<<"语法错误 '('后缺少表达式"<<endl;
            return;
        }
//        cout<<sym<<endl;
        expressionAnalysis();
        if(conterr)
        {
            return;
        }
        if(flag==0||sym!="24")
        {
            conterr++;
            cout<<"语法错误 表达式后面缺少')'"<<endl;
            return;
        }
        else
        {
            lpnum--;
            flag=advance();
            if(conterr)
            {
                return;
            }
            if(flag==0)
            {
                return;
            }
        }
    }
    else
    {
        cout<<"语法错误 因子首部不为<标识符>|<无符号整数>|'('"<<endl;
        conterr++;
        return;
    }
    return;
}

void termAnalysis()
{
//    cout<<"term\n"<<sym<<endl;
    factorAnalysis();
    if(conterr)
    {
        return;
    }
    while((sym=="14")||(sym=="15")) //当为'*'或'/'的时候，一直往后识别因子并循环
    {
        flag=advance();
        if(conterr)
        {
            return;
        }
        if(flag==0)
        {
            conterr++;
            cout<<"语法错误 <乘法运算符>后缺因子"<<endl;
            return;
        }
        if(conterr)
        {
            return;
        }
        factorAnalysis();
        if(conterr)
        {
            return;
        }
    }
    return;
}

void expressionAnalysis()
{
//    cout<<"expression!\n"<<sym<<endl;
    if(conterr)
    {
        return;
    }
    if((sym=="14")||(sym=="15")) //当为'*'或'/'的时候
    {
        flag=advance();
        if(!conterr)
        {
            return;
        }
        if(flag==0)
        {
            cout<<"语法错误 <加法运算符>后缺项"<<endl;
            conterr++;
            return;
        }
    }
    termAnalysis();

     if((sym=="14")||(sym=="15")) //当为'*'或'/'的时候
    {
        flag=advance();
        if(!conterr)
        {
            return;
        }
        if(flag==0)
        {
            cout<<"语法错误 <加法运算符>后缺项"<<endl;
            conterr++;
            return;
        }
    }

    if(conterr)
    {
        return;
    }
    while((sym=="12")||(sym=="13")) //当为'+'或'-'的时候，一直往后识别项并循环
    {
//        cout<<str.substr(found+1,4)<<endl;



        flag=advance();
        if(conterr)
        {
            return;
        }
        if(flag==0)
        {
            cout<<"语法错误 <加法运算符>后缺项"<<endl;
            conterr++;
            return;
        }
        termAnalysis();
        if(conterr)
        {
            return;
        }
    }
    return;
}

bool is_blank(char ch)
{
    return ch == ' ' || ch == '    ';//空格或控制字符
}
bool gofor(char& ch, string::size_type& pos, const string& prog)//返回指定位置的字符
{
    ++pos;
    if (pos >= prog.size())
    {
        return false;
    }
    else
    {
        ch = prog[pos];
        return true;
    }
}

_2tup scanner(const string& prog, string::size_type& pos, const map<string, int>& keys, int& row)
{
    /*
    if
        标示符
    else if
        数字
    else
        符号
    */
    _2tup ret;
    string token;
    int id = 0;

    char ch;
    ch = prog[pos];

    while(is_blank(ch))
    {
        ++pos;
        ch = prog[pos];
    }
    // 判断标示符、关键字
    if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')
    {

        map<string, int>::const_iterator cit;
        //保证读取一个单词
        while((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')
        {
            token += ch;//追加标示符、关键字
            if (!gofor(ch, pos, prog))
            {
                break;
            }
            if(token=="odd")
            {
                id=cit->second;
                goto end;
            }
        }
        // 这里先看做都是其他标示符
        id = keys.size();
        // 验证是否是关键字
        cit = keys.find(token);//根据string类型的token返回int类型的id赋值给cit

        if (cit != keys.end())
        {
            id = cit->second;//此时是关键字，记录他的id
//            if(id<=10)
//            cout<<cit->first<<"is true!"<<cit->second<<endl;
        }
    }
    // 识别常数
    else if ((ch >= '0' && ch <= '9'))
    {
        while (ch >= '0' && ch <= '9' )
        {
            token += ch;
            if (!gofor(ch, pos, prog))
            {
                break;
            }
        }
        id = keys.size() - 1;
        int dot_num = 0;
        for (string::size_type i = 0; i != token.size(); ++i)
        {
            if (token[i] == '.')
            {
                ++dot_num;
            }
        }
        if (dot_num > 1)
        {
            id = -1;
        }
    }
    else
    {
        map<string, int>::const_iterator cit;
        switch (ch)
        {
        case '-': // - 操作符
            token += ch;
            gofor(ch,pos,prog);
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;
        case ':':
            token += ch;
            if (gofor(ch, pos, prog))
            {
                if (ch == '=') // -- 操作符
                {
                    token += ch;
                    gofor(ch, pos, prog);  //不让:=  中的=重复使用
                }
            }
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;

        case '=':
            token += ch;
            gofor(ch,pos,prog);
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;

        case '/': // / 操作符
            token += ch;
            gofor(ch,pos,prog);
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;
        case '+':
            token += ch;
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            gofor(ch, pos, prog);
            break;

        case '<':
            token += ch;
            if (gofor(ch, pos, prog))
            {
                if (ch == '>')
                {
                    token += ch;
                    gofor(ch,pos,prog);
                }
                else if (ch == '=')
                {
                    token += ch;
                    gofor(ch, pos, prog);
                }
            }
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;

        case '>':
            token += ch;
            if (gofor(ch, pos, prog))
            {
                if (ch == '=')
                {
                    token += ch;
                    gofor(ch, pos, prog);
                }
            }
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;
        case '(': // / 操作符
            token += ch;
            gofor(ch,pos,prog);
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;
        case ')': // / 操作符
            token += ch;
            gofor(ch,pos,prog);
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;


        case '*':
            token += ch;
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            gofor(ch, pos, prog);
            break;

        case ',':
        case '.':
        case ';':
            token += ch;
            gofor(ch, pos, prog);
            //++pos;
            //ch = prog[pos];
            cit = keys.find(token);
            if (cit != keys.end())
            {
                id = cit->second;
            }
            break;

        case '\n':
            token += "换行";
            ++pos;
            ch = prog[pos];
            id = -2;
            break;
        default:
            token += "错误";
            ++pos;
            ch = prog[pos];
            id = -1;
            break;
        }
    }
end:

    ret.token = token;
    ret.id    = id;

    return ret;
}

void init_keys(const string& file, map<string, int>& keys)//读取单词符号和种别码
{
    ifstream fin(file.c_str());//.c_str返回的是当前字符串的首地址
    if (!fin)
    {
        cerr << file << " doesn't exist!" << endl;//cerr不经过缓冲而直接输出,一般用于迅速输出出错信息
        //  exit(1);
    }
    keys.clear();//清空map对象里面的内容
    string line;
    string key;
    int id;
    while (getline(fin, line))//这个函数接收两个参数：一个输入流对象和一个string对象，getline函数从输入流的下一行读取，并保存读取的内容到string中
    {
        istringstream sin(line);//istringstream sin(s);定义一个字符串输入流的对象sin,并调用sin的复制构造函数，将line中所包含的字符串放入sin 对象中！
        sin >> key >> id;//读取里面的字符串每一行一个key id
        keys[key] = id;
    }
}

void read_prog(const string& file, string& prog) //读取代码，并追加到prog上
{
    ifstream fin(file.c_str());
    if (!fin)
    {
        cerr << file << " error!" << endl;
        //  exit(2);
    }
    prog.clear();
    string line;
    while (getline(fin, line))
    {
        prog += line + '\n';
    }
}

void cifafenxi()
{
    map<string, int> keys;
    init_keys("a.txt", keys);

    string prog;
    read_prog("b.txt", prog);

    vector< _2tup > tups;
    string token, id;

    string::size_type pos = 0;//size_type属于string标准库，作用可看做相当于unsigned·int
    int row  = 1;

    _2tup tu;
    int no = 0;
//    freopen("CON", "w", stdout);//结果在控制台上输出
    freopen("c.txt","w",stdout);

    do
    {
        tu = scanner(prog, pos, keys, row);

        switch (tu.id)
        {
        case -1://返回的是错误
            ++no;
            cout << no << ": ";
            cout << "Error in row" << row << "!" << '<' << tu.token<< "," << tu.id << '>' << endl;
            tups.push_back(tu);
            break;
        case -2:
            ++row;
            // cout << '<' << tu.token<< "," << tu.id << '>' << endl;
            break;
        default:

            s=prog;
            cout << '(' << tu.id<< "," << tu.token << ')' << endl;

            tups.push_back(tu);
            break;
        }
    }
    while (pos < prog.size());

}

void yufafenxi()//语法分析
{
    freopen("CON", "w", stdout);//结果在控制台上输出
    flag=advance();
    if(flag)
    {
        expressionAnalysis();
    }
    if(flag!=-1 && !conterr)
    {
        cout<<"正确："<<s<<endl;
    }

}
int main()
{
    cifafenxi();
    yufafenxi();
    return 0;
}