LRO文法分析程序

刷题菌

已于 2023-11-26 19:12:20 修改

阅读量77

点赞数

文章标签：算法数据结构

于 2023-11-25 14:57:33 首次发布

本文链接：https://blog.csdn.net/m0_63645510/article/details/134613835

版权

//
// Created by WXuanY on 2023/11/24.
//
//LR(0)语法分析程序
#include <cstdio>
#include <cctype>
#include <cstring>
#include<iostream>
#include<algorithm>
#include<stack>
//项目集族的下标（状态号）
int index1 = 0;//现在的下标号
int index11 = 1;//项目集的个数
int index2 = 0;
int goNum = 0;//go表的下标
int index2s[20];
char LR0[50][50][100];
char L[200]; //列判断依据
int  del[10];//0-6号文法每个文法长度
char head[20];
std::stack<int>con;    ///状态栈
std::stack<char>cmp;   ///符号栈
char cod[300]="0";///初始状态栈对应输出数组
int cindex = 0;
char sti[300]="#";///初始符号栈对应输出数组
int sindex = 0;
//分析表内的结构体
typedef struct {
    char act;
    int state;
    int nowState; //现在的状态号
    char pan; //转换符(就是第一行)
}Trans;
//构造产生式结构体
typedef struct{
    char left[10];//产生式左
    char right[10];//右
}Production;
//GOTO的结构体
typedef struct{
    int fore;  //转换前的状态集号
    char Pan;
    int next;  //转换后的状态号
    char act;
}Got;
//区分终结符和非终结符
void extractSymbols(char G[][100], int num, char* VN, char* VT) {
    int vnCount = 0, vtCount = 0;
    for (int i = 0; i < num; i++) {
        for (int j = 0; j < strlen(G[i]); j++) {
            char symbol = G[i][j];
            if (isupper(symbol) && symbol != 'W') {
                if (strchr(VN, symbol) == NULL) {
                    VN[vnCount++] = symbol;
                }
            } else if(symbol != ':'&& symbol != 'W') {
                if (strchr(VT, symbol) == NULL) {
                    VT[vtCount++] = symbol;
                }
            }
        }
    }
    VN[vnCount] = '\0';
    VT[vtCount] = '#';
    VT[vtCount+1] = '\0';
}
//初始化结构体
void initProduction(Production *prod, char G[][100], int n){
    for(int i=0; i<n; i++){
        char *p = strchr(G[i], ':'); // 查找分隔符的位置
        if (p != NULL) {
            int left_len = p - G[i]; // 计算左边字符串的长度
//            char left_str[left_len + 1]; // 定义左边字符串的空间，+1是为了留出'\0'的空间
            strncpy(prod[i].left, G[i], left_len); // 拷贝左边字符串
            prod[i].left[left_len] = '\0'; // 添加'\0'结束符
//            printf("左边字符串：%s\n", prod[i].left);
            int right_len = strlen(G[i]) - left_len - 1; // 计算右边字符串的长度
//            char right_str[right_len + 1]; // 定义右边字符串的空间，+1是为了留出'\0'的空间
            strncpy(prod[i].right, p + 1, right_len); // 拷贝右边字符串
            prod[i].right[right_len] = '\0'; // 添加'\0'结束符
//            printf("右边字符串：%s\n", prod[i].right);
        } else {
            printf("未找到分隔符\n");
        }
    }
}
// 判断项目是否已经在项目集中
int isInItemSet(Production itemSet[200], Production item) {
    for (int i = 0; i < index2; i++) {
        if (strcmp(itemSet[i].left, item.left) == 0 && strcmp(itemSet[i].right, item.right) == 0) {
            return 1;
        }
    }
    return 0;
}
// 判断符号是否已经添加到go组中
int isGo(int I, char x, Got *go){
    for(int i=0; i<goNum; i++){
        if(I == go[i].fore && x==go[i].Pan){
            return 1;  //有
        }
    }
    return 0;
}
// 打印项目集
void printItemSet(Production itemSet[][200]) {
    for (int i = 0; i < (index1); i++) {
        printf("[I%d]:\n",i);
        for(int j=0; j < index2s[i]; j++){
            printf("%s:%s\n", itemSet[i][j].left,itemSet[i][j].right);
        }
    }
}
//添加Go组的项目
void creatGo(Got *go, int I, char x,char y,int next, char *VN){
    go[goNum].fore = I;
    go[goNum].Pan = x;
    go[goNum].next = next;
    if(strchr(VN, go[goNum].Pan) == NULL){
        go[goNum].act = y;
    }
    if(next == index11)
        index11++;
    goNum++;
}
void creatGo1(Got *go, int I, char x, char y,int gui){
    go[goNum].fore = I;
    go[goNum].Pan = x;
    go[goNum].next = gui;
    go[goNum].act = y;
//    index11++;
    goNum++;
}
//寻找next
int findNext(int I, char x, Got* go){
    for(int i=0; i<goNum; i++){
        if((go[i].fore == I) && (go[i].Pan == x)){
            return go[i].next;
        }
    }
    return 0;
}
//寻找action
char findAct(int I, char x, Got* go){
    for(int i=0; i<goNum; i++){
        if((go[i].fore == I) && (go[i].Pan == x)){
            return go[i].act;
        }
    }
    return 0;
}
//判断之前的闭包中有没有出现过这个式子
int isAppear(Production itemSet[][200],Production item){
    for(int i=0; i<index1;i++){
        for(int j=0; j<index2s[i];j++){
            if((strcmp(itemSet[i][j].left,item.left) == 0) && (strcmp(itemSet[i][j].right,item.right) == 0)){
                return (i+1);
            }
        }
    }
    return 0;
}
//计算项目闭包(包括初始项目集)
void creatClosure(Production itemSet2[200], Production prods[100], int nProds, char *VN, Got *go, Production itemSet[][200]){
    for (int i = 0; i < index2; i++) {
        char *dotPos = strchr(itemSet2[i].right, '.');  // 查找项目中的点位置
        //有点,不是最后一个,且下一个是非终结符,则取下一个
        if (dotPos != NULL && dotPos[1] != '\0' && strchr(VN, dotPos[1])) {
            char nextSymbol = dotPos[1];  // 下一个符号
            //是否在go集里
            if(!isGo(index1, nextSymbol, go)){
                int s=0;
                int m=0;
                int t=0;
                //在通过这个式子构造下一个产生式之前,先检验一下之前的闭包中有没有出现过这个式子,如果出现过就直接创建go集
                int a = isAppear(itemSet,itemSet2[i]);
                if(!a) {
                    //这里是构造下一个集里的第一个产生式
                    sprintf(itemSet[index11][0].left, "%s", itemSet2[i].left);
                    char take1[20] = {'\0'};
                    for (s = 0; s < strlen(itemSet2[i].right); s++) {
                        if (itemSet2[i].right[s] != '.') {
                            take1[t++] = itemSet2[i].right[s];
                            if (itemSet2[i].right[s] == nextSymbol) {
                                s++;
                                break;
                            }
                        }
                    }
                    char take2[20] = {'\0'};
                    for (int n = s; n < strlen(itemSet2[i].right); n++) {
                        take2[m++] = itemSet2[i].right[n];
                    }
                    sprintf(itemSet[index11][0].right, "%s.%s", take1, take2);
                    creatGo(go, index1, nextSymbol, 'S',index11,VN);
                }else{
                    int b = findNext((a-1),nextSymbol,go);
                    creatGo(go, index1, nextSymbol, 'S', b,VN);
                }
            }
            // 对于每个产生式，如果产生式的左部是下一个符号，则将该产生式加入闭包
            for (int j = 0; j < nProds; j++) {
                if (prods[j].left[0] == nextSymbol) {
                    Production newItem;
                    sprintf(newItem.left, "%s", prods[j].left);
                    sprintf(newItem.right,".%s",prods[j].right);
                    // 如果项目集中没有该项目，则加入闭包
                    if (!isInItemSet(itemSet2, newItem)) {
                        strcpy(itemSet2[index2].left, newItem.left);
                        strcpy(itemSet2[index2++].right, newItem.right);
//                            updated = 1;
                    }
                }
            }
        }else if(dotPos != NULL && dotPos[1] != '\0' && (strchr(VN, dotPos[1])==NULL)){
            char nextSymbol = dotPos[1];  // 下一个符号
            if(!isGo(index1, nextSymbol, go)){
                int s=0;
                int m=0;
                int t=0;
                //在通过这个式子构造下一个产生式之前,先检验一下之前的闭包中有没有出现过这个式子,如果出现过就直接创建go集
                int a = isAppear(itemSet,itemSet2[i]);
                if(!a) {
                    sprintf(itemSet[index11][0].left, "%s", itemSet2[i].left);
                    char take1[20] = {'\0'};
                    for (s = 0; s < strlen(itemSet2[i].right); s++) {
                        if (itemSet2[i].right[s] != '.') {
                            take1[t++] = itemSet2[i].right[s];
                            if (itemSet2[i].right[s] == nextSymbol) {
                                s++;
                                break;
                            }
                        }
                    }
                    char take2[20] = {'\0'};
                    for (int n = s; n < strlen(itemSet2[i].right); n++) {
                        take2[m++] = itemSet2[i].right[n];
                    }
                    sprintf(itemSet[index11][0].right, "%s.%s", take1, take2);
                    creatGo(go, index1, nextSymbol, 'S', index11,VN);
                }else{
                    int b = findNext((a-1),nextSymbol,go);
                    creatGo(go, index1, nextSymbol, 'S', b,VN);
                }
            }
        }else if(dotPos != NULL && dotPos[1] == '\0'){
            for(int k=0;k<nProds;k++){
                char right[20]={'\0'};
                for(int l=0;l< (strlen(itemSet2[i].right)-1);l++){
                    right[l] = itemSet2[i].right[l];
                }
                if(strcmp(prods[k].left,itemSet2[i].left)==0 && strcmp(prods[k].right,right)==0){
                    creatGo1(go,index1,'#','r',k);
                    break;
                }
            }
        }
    }
    index2s[index1] = index2;
    index1++;
    index2=1;
}

//构造初始项目集
void InitialItemSet(Production itemSet[][200], Production *prods, int nProds, char *VN, Got *go) {
    // 将开始符号产生式加入项目集,前提是扩广函数放在了第一个
    //sprintf() 函数是C语言中的一个标准库函数，用于将格式化的数据写入字符串中
    sprintf(itemSet[index1][index2].left, "%s", prods[0].left);
    sprintf(itemSet[index1][index2].right, ".%s", prods[0].right);
    index2++;
    // 遍历所有产生式，找到以开始符号为左部的产生式并加入项目集
    for (int i = 0; i < nProds; i++) {
        //如果字符串str1和str2相等，则strcmp()返回0。
        if (strcmp(prods[i].left, prods[0].right) == 0) {
            sprintf(itemSet[index1][index2].left, "%s", prods[i].left);
            sprintf(itemSet[index1][index2].right, ".%s", prods[i].right);
            index2++;
        }
    }

    // 打印初始项目集
//    printf("Initial Item Set:\n");
//    printItemSet(itemSet);
    creatClosure(itemSet[index1],prods,nProds,VN, go, itemSet);
    while (index1 < index11){
        creatClosure(itemSet[index1],prods,nProds,VN, go, itemSet);
    }
}
//创建LR0分析表
void creatOPG(Got go[100],Trans OPG[][100], char* VN, char* VT){
    //先初始化
    for(int i=0; i<index11+1; i++){
        for(int j=0; j< ((strlen(VN) + strlen(VT))+1); j++){
            if(!(i==0 && j==0)){
                if(i==0&&j<= strlen(VT)){
                    OPG[i][j].pan = VT[j-1];
                }else if(i==0){
                    OPG[i][j].pan = VN[j-strlen(VT)-1];
                }else if(j!=0 &&j<= strlen(VT)){
                    OPG[i][j].pan = VT[j-1];
                    OPG[i][j].nowState = i-1;
                }else if(j!=0){
                    OPG[i][j].pan = VN[j-strlen(VT)-1];
                    OPG[i][j].nowState = i-1;
                }else{
                    OPG[i][j].nowState = i-1;
                }
            }
        }
    }
    int h=0;
    for(int i=1;i<index11+1;i++){
        for(int j=1;j<((strlen(VN) + strlen(VT))+1);j++){
            OPG[i][j].act = findAct(OPG[i][j].nowState,OPG[i][j].pan,go);
            OPG[i][j].state = findNext(OPG[i][j].nowState,OPG[i][j].pan,go);
            if(OPG[i][j].act == 'r' && OPG[i][j].state!=0){
                while(h<(strlen(VT)+1)){
                    OPG[i][h].act = findAct(OPG[i][j].nowState,OPG[i][j].pan,go);
                    OPG[i][h].state = findNext(OPG[i][j].nowState,OPG[i][j].pan,go);
                    h++;
                }
            }
        }
        h=0;
    }
}
//打印OPG表
void printOPG(Trans OPG[][100],char *VN, char *VT){
    for(int i=0; i<(index11+1); i++){
        for(int j=0; j< ((strlen(VN) + strlen(VT))+1); j++){
            if(!(i==0 && j==0)){
                if(i==0&&j< strlen(VT)){
                    printf(" %c\t", OPG[i][j].pan);
                }else if(i==0){
                    printf(" %c\t",OPG[i][j].pan);
                }else if(j!=0 &&j< strlen(VT)){
                    printf("%c%d\t", OPG[i][j].act,  OPG[i][j].state);
                }else if(j!=0){
                    printf("%c%d\t", OPG[i][j].act,  OPG[i][j].state);
                }else{
                    printf(" %d\t", OPG[i][j].nowState);
                }
            }else{
                printf(" 0\t");
            }
        }
        printf("\n");
    }
}
int findL(char b)///对应列寻找
{
    for(int i = 0; i <= 7; i++)
    {
        if(b==L[i])
        {
            return i;
        }
    }
    return -1;
}
void error(int x, int y)       ///报错输出
{
    printf("第%d行%c列为空!",x,L[y]);
}

int calculate(int l, char s[])
{
    int num = 0;
    for(int i = 1; i < l; i ++)
    {
        num =  num*10+(s[i]-'0');
    }
    return num;
}
//对输入串进行LR0分析过程
void creatTable(char str[], int len){
    int cnt = 1;
    printf("步骤      状态栈    符号栈    输入串    ACTION    GOTO\n");
    int LR = 0;
    while(LR<=len)
    {
        printf("(%d)       %-10s%-10s",cnt,cod,sti);//步骤，状态栈，符号栈输出
        cnt++;
        for(int i = LR; i < len; i++)//输入串输出
        {
            printf("%c",str[i]);
        }
        for(int i = len-LR; i<10;i++)printf(" ");

        int x = con.top();//状态栈栈顶
        int y = findL(str[LR]);//待判断串串首

        if(strcmp(LR0[x][y],"null")!=0)
        {
            int l = strlen(LR0[x][y]);//当前Ri或Si的长度

            if(LR0[x][y][0]=='a')///acc
            {
                printf("acc        \n");//ACTION与GOTO
                return ;
            }
            else if(LR0[x][y][0]=='S')///Si
            {
                printf("%-10s \n",LR0[x][y]);//ACTION与GOTO
                int t = calculate(l,LR0[x][y]);//整数
                con.push(t);
                sindex++;
                sti[sindex] = str[LR];
                cmp.push(str[LR]);
                if(t<10)
                {
                    cindex++;
                    cod[cindex] = LR0[x][y][1];
                }
                else
                {
                    int k = 1;
                    cindex++;
                    cod[cindex] = '(';
                    while(k<l)
                    {
                        cindex++;
                        cod[cindex] = LR0[x][y][k];
                        k++;
                    }
                    cindex++;
                    cod[cindex] = ')';
                }
                LR++;
            }
            else if(LR0[x][y][0]=='r')///ri,退栈，ACTION和GOTO
            {
                printf("%-10s",LR0[x][y]);
                int t = calculate(l,LR0[x][y]);
                int g = del[t];
                while(g--)
                {
                    con.pop();
                    cmp.pop();
                    sti[sindex] = '\0';
                    sindex--;
                }
                g = del[t];
                while(g>0)
                {
                    if(cod[cindex]==')')
                    {
                        cod[cindex]='\0';
                        cindex--;
                        for(;;)
                        {
                            if(cod[cindex]=='(')
                            {
                                cod[cindex]='\0';
                                cindex--;
                                break;
                            }else
                            {
                                cod[cindex]='\0';
                                cindex--;
                            }
                        }
                        g--;
                    }else
                    {
                        cod[cindex] = '\0';
                        cindex--;
                        g--;
                    }
                }//
                cmp.push(head[t]);
                sindex++;
                sti[sindex] = head[t];
                x = con.top();
                y = findL(cmp.top());
                if(LR0[x][y][1]!='\0'){
                    t = (LR0[x][y][0]-'0')*10+(LR0[x][y][1]-'0');
                }else{
                    t = LR0[x][y][0]-'0';
                }
                con.push(t);
                if(t<10)
                {
                    cindex++;
                    cod[cindex] = LR0[x][y][0];
                }
                else
                {
                    int k = 0;
                    cindex++;
                    cod[cindex] = '(';
                    while(k<l)
                    {
                        cindex++;
                        cod[cindex] = LR0[x][y][k];
                        k++;
                    }
                    cindex++;
                    cod[cindex] = ')';
                }
//                cod[cindex] = LR0[x][y][0];
                printf("%-10d\n",t);
            }else
            {
                int t = LR0[x][y][0]-'0';
                char ch = ' ';
                printf("%-10c%-10d\n",ch,t);
                con.push(t);
                cindex++;
                cod[cindex] = LR0[x][y][0];
                sindex++;
                sti[sindex] = 'E';
                LR++;
            }
        }else
        {
            error(x,y);
            return ;
            ///报错
        }
    }
}
int main(){
    int num;
    //储存非终结符
    char VN[200]={'\0'};
    //储存终结符
    char VT[200]={'\0'};
    //储存文法G[S]
    char G[100][100]={'\0'};
    //储存结构体文法
    Production prod[100]={'\0'};
    //储存项目集组
    Production itemSet[100][200]={'\0'};
    //存储GO(I,x)的数组
    Got go[100]={'\0'};
    //储存LR0分析表
    Trans OPG[100][100]={'\0'};
    char str[100]={'\0'};

    //读入文法
    printf("请输入文法产生式的数量:\n");
    scanf("%d", &num);
    getchar();
    //S'用W代替
    printf("请按行输入文法:\n");
    for(int i=0; i<num; i++){
        gets(G[i]);
    }
    //初始化结构体
    initProduction(prod,G,num);
    //区分VT和VN
    extractSymbols(G, num, VN, VT);
    //构造初始项目集(从扩广文法开始)
    InitialItemSet(itemSet,prod,num, VN, go);
    printf("Item Set:\n");
    printItemSet(itemSet);
    //构造分析表
    creatOPG(go,OPG,VN,VT);
    //打印分析表
    printf("----------------------------分析表--------------------------------\n");
    printOPG(OPG,VN,VT);
    printf("请输入字符串\n");
    gets(str);
    con.push(0);
    cmp.push('#');
    int len = strlen(str);
    for(int h=1;h<((strlen(VN) + strlen(VT))+1);h++){
        L[h-1] = OPG[0][h].pan;
    }
    del[0] = 0;
    head[0] = 'W';
    for(int h=1; h<num;h++){
        del[h] = strlen(prod[h].right);
        head[h] = prod[h].left[0];
    }
    for(int i=1;i<index11+1;i++){
        for(int j=1;j<((strlen(VN) + strlen(VT))+1);j++){
            if(OPG[i][j].act!='\0' && OPG[i][j].state!=0){
                LR0[i-1][j-1][0]=OPG[i][j].act;
                if(OPG[i][j].state<10){
                    LR0[i-1][j-1][1]='0'+OPG[i][j].state;
                }else {
                    LR0[i-1][j-1][1]='0'+(OPG[i][j].state/10);
                    LR0[i-1][j-1][2]='0'+(OPG[i][j].state%10);
                }
            }else if(OPG[i][j].act=='\0' && OPG[i][j].state!=0){
                if(OPG[i][j].state<10){
                    LR0[i-1][j-1][0]='0'+OPG[i][j].state;
                }else {
                    LR0[i-1][j-1][0]='0'+(OPG[i][j].state/10);
                    LR0[i-1][j-1][1]='0'+(OPG[i][j].state%10);
                }
            }else if(OPG[i][j].act=='r' && OPG[i][j].state==0){
                LR0[i-1][j-1][0]='a';
                LR0[i-1][j-1][1]='c';
                LR0[i-1][j-1][2]='c';
            }else if(OPG[i][j].act=='\0' && OPG[i][j].state==0){
                LR0[i-1][j-1][0]='n';
                LR0[i-1][j-1][1]='u';
                LR0[i-1][j-1][2]='l';
                LR0[i-1][j-1][3]='l';
            }
        }
    }
    printf("------------------------------分析过程-----------------------------\n");
    creatTable(str,len);
    return 0;
}

请注意输入文法时请输入扩广后的文法，且扩广文法的起始符号是W

运行结果

请输入文法产生式的数量:
7
请按行输入文法:
W:E
E:aA
E:bB
A:cA
A:d
B:cB
B:d
Item Set:
[I0]:
W:.E
E:.aA
E:.bB
[I1]:
W:E.
[I2]:
E:a.A
A:.cA
A:.d
[I3]:
E:b.B
B:.cB
B:.d
[I4]:
E:aA.
[I5]:
A:c.A
A:.cA
A:.d
[I6]:
A:d.
[I7]:
E:bB.
[I8]:
B:c.B
B:.cB
B:.d
[I9]:
B:d.
[I10]:
A:cA.
[I11]:
B:cB.
----------------------------分析表--------------------------------
 0       a       b       c       d       #       E       A       B
 0      S2      S3       0       0       0       1       0       0
 1       0       0       0       0      r0       0       0       0
 2       0       0      S5      S6       0       0       4       0
 3       0       0      S8      S9       0       0       0       7
 4      r1      r1      r1      r1      r1       0       0       0
 5       0       0      S5      S6       0       0       10      0
 6      r4      r4      r4      r4      r4       0       0       0
 7      r2      r2      r2      r2      r2       0       0       0
 8       0       0      S8      S9       0       0       0       11
 9      r6      r6      r6      r6      r6       0       0       0
 10     r3      r3      r3      r3      r3       0       0       0
 11     r5      r5      r5      r5      r5       0       0       0
请输入字符串
bccd#
------------------------------分析过程-----------------------------
步骤      状态栈    符号栈    输入串    ACTION    GOTO
(1)       0         #         bccd#     S3
(2)       03        #b        ccd#      S8
(3)       038       #bc       cd#       S8
(4)       0388      #bcc      d#        S9
(5)       03889     #bccd     #         r6        11
(6)       0388(11)  #bccB     #         r5        11
(7)       038(11)   #bcB      #         r5        7
(8)       037       #bB       #         r2        1
(9)       01        #E        #         acc

Process finished with exit code 0