pl/0词法分析器实验代码

东来梁蕴秀

已于 2022-04-26 19:07:04 修改

阅读量865

点赞数 3

分类专栏： c/c++相关文章标签： c++ c语言

于 2022-04-22 10:59:58 首次发布

本文链接：https://blog.csdn.net/m0_61973596/article/details/124340500

版权

c/c++相关专栏收录该内容

20 篇文章 1 订阅

订阅专栏

在基本要求中，增加了一个学生，姓名，的基本字

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MAXSIZE 999
#define norw 13 //关键字个数
#define txmax 999 //名字表容量
#define al 16 //符号最大长度
#define NAME "姓名"
#define STUNUM "学号"
#define REALNAME "董楚梁"

//单词种别，nul表示不能识别
enum symbol
{
    //非法，标识符，数字
    nul, ident, number,
    //运算符
    plus, minus, times, slash, eql, neq, lss, leq, gtr, geq, becomes,
    //界符
    lparen, rparen, comma, semicolon, period,
    //基本字13 +1(自己的"姓名""学号""董楚梁")
   beginsym, endsym, ifsym, thensym, whilesym, writesym, readsym, dosym, callsym, oddsym, constsym, varsym, procsym, yessym
};

enum symbol wsym[norw+1]; //保留字对应的符号值,1自己定义基本字
enum symbol ssym[256];    //单字符的符号值

//名字表结构
struct tablestruct {
    char name[al + 1]; //符号
    symbol sym; //符号种别
}table[txmax];

char str[MAXSIZE];  //存储文件内容                         
FILE* fin;
int t = 0;    //计数，在给table赋值时使用
//保留字
char word[norw][al] = { "begin","call","const","do","end","if","odd",
                    "procedure","read","then","var","while","write" };//顺序排列，方便二分查找

bool IsChinese(char ch)
{
    if ((ch >> 8) == -1)
        return true;
    return false;
}
//bool IsChinese(char ch)
//{
//	if(ch>=0&&ch<=127)
//		return true;
//	return false;
//}



//读入并保存文件内容，保存到str[MAXSIZE]中
void read() {
    char ch;
    strcpy(str, "");
    int i = 0;
    if (!fin)
    {
        printf("打开文件失败!\n");
        exit(-1);
    }
    while ((ch = fgetc(fin)) != EOF)
    {
        if (ch != '\n' && ch != '\t')
            str[i++] = ch;
        else //换行和Tab均保存为空格
            str[i++] = ' ';
    }//读入文件中内容
    str[i] = '\0';
    //printf("%s", str);
    fclose(fin);
}

//初始化
void init() {
    read();
    //设置单字符符号
    for (int i = 0; i < 256; i++)
    {
        ssym[i] = nul;
    }
    ssym['+'] = plus;
    ssym['-'] = minus;
    ssym['*'] = times;
    ssym['/'] = slash;
    ssym['('] = lparen;
    ssym[')'] = rparen;
    ssym['='] = eql;
    ssym[','] = comma;
    ssym['.'] = period;
    ssym['#'] = neq;
    ssym[';'] = semicolon;

    //保留字对应符号
    wsym[0] = beginsym;
    wsym[1] = callsym;
    wsym[2] = constsym;
    wsym[3] = dosym;
    wsym[4] = endsym;
    wsym[5] = ifsym;
    wsym[6] = oddsym;
    wsym[7] = procsym;
    wsym[8] = readsym;
    wsym[9] = thensym;
    wsym[10] = varsym;
    wsym[11] = whilesym;
    wsym[12] = writesym;
    wsym[13] = yessym;
}

//获取符号种别,并且存储到结构体数组中
void getsym() {
    int len = strlen(str);
    //printf("符号长度%d\n", len);
    int j = 0, k=0, pos = 0;
    char s[al + 1];//临时符号
    while (pos < len && j < len)
    { 
        strcpy(s, "");//s初始化
        j = k = 0;
        //空格直接跳过
        while (str[pos] == ' ')
        {
            pos++;
        }
        //以字母开头，可能是标识符或保留字，英文或者字符
       if (!IsChinese(str[pos])&&pos<len)//英文打头
        {
            if (str[pos] >= 'a' && str[pos] <= 'z')
            {
                j = pos;
                int flag=0;
                do
                { 
                   if (flag< al)
                    {
                        s[k++] = str[j++];
                        flag++;
                    }
                   else
                   {
                       j++;
                   }
                } while ((str[j] >= 'a' && str[j] <= 'z') || (str[j] >= '0' && str[j] <= '9'));
                s[k] = '\0';
                int m = 0, n = norw - 1;
                //折半查找，看字符串是否是保留字
                do {
                    k = (m + n) / 2;
                    if (strcmp(s, word[k]) <= 0)
                    {
                        n = k - 1;
                    }
                    if (strcmp(s, word[k]) >= 0)
                    {
                        m = k + 1;
                    }
                } while (m <= n);
                strcpy(table[t].name, s);
                if (m - 1 > n) //基本字
                {
                    table[t].sym = wsym[k];
                }
                else //标识符
                {
                    table[t].sym = ident;
                }
                t++;
                pos = j;
            }
            else
            {
                if (str[pos] >= '0' && str[pos] <= '9') //数字
                {
                    j = pos;
                    table[t].sym = number;
                    while (str[j] >= '0' && str[j] <= '9')
                        s[k++] = str[j++];
                    s[k] = '\0';
                    strcpy(table[t].name, s);
                    pos = j;
                    t++;
                }
                else
                {
                    if (str[pos] == ':') {
                        if (str[pos + 1] == '=') {//:=运算符
                            table[t].sym = becomes;
                            strcpy(table[t].name, ":=");
                            pos += 2;
                        }
                        else {//非法字符，设置为nul
                            table[t].sym = nul;
                            s[0] = ':'; s[1] = '\0';
                            strcpy(table[t].name, s);
                            pos++;
                        }
                        t++;
                    }
                    else
                    {
                        if (str[pos] == '<') {
                            if (str[pos + 1] == '=') {// <=
                                table[t].sym = leq;
                                strcpy(table[t].name, "<=");
                                pos += 2;
                            }
                            else {// <
                                table[t].sym = lss;
                                strcpy(table[t].name, "<");
                                pos++;
                            }
                            t++;
                        }
                        else
                        {
                            if (str[pos] == '>') {
                                if (str[pos + 1] == '=') {// >=
                                    table[t].sym = geq;
                                    strcpy(table[t].name, ">=");
                                    pos += 2;
                                }
                                else {// >
                                    table[t].sym = gtr;
                                    strcpy(table[t].name, ">");
                                    pos++;
                                }
                                t++;
                            }
                            else
                            {//其它单个字符
                                table[t].sym = ssym[str[pos]];
                                s[0] = str[pos]; s[1] = '\0';
                                strcpy(table[t].name, s);
                                t++;
                                pos++;
                            }
                        }
                    }
                }
            }
        }
       else//中文
       {
       j = pos;
       int flag = 0;
       do
       {
           if (flag < al)
           {
               s[k++] = str[j++];
               flag++;
           }
           else
           {
               j++;
           }
       } while ((str[j] >> 8) == -1|| (str[j] >= '0' && str[j] <= '9'));
       s[k] = '\0';
       strcpy(table[t].name, s);
       int ret1 = strcmp(s, STUNUM);
       int ret2 = strcmp(s, NAME);
       int ret3 = strcmp(s, REALNAME);
	   if (!ret1 || !ret2 || !ret3)
       {
           table[t].sym = wsym[13];
       }
       t++;
       pos = j;
       }           
    }
}

//输出表内容
void print() {
    int i;
    for (i = 0; i < t; i++)
    {
        printf("%-15s ", table[i].name);
        if (table[i].sym == 0)
        {
            printf("非法");
        }
        else if (table[i].sym == 1)
        {
            printf("标识符");
        }
        else if (table[i].sym == 2)
        {
            printf("数字");
        }
        else if (table[i].sym >= 3 && table[i].sym <= 13)
        {
            printf("运算符");
        }
        else if (table[i].sym >= 14 && table[i].sym <= 18)
        {
            printf("界符");
        }
        else
        {
            printf("基本字");
        }
        printf("\n");
    }
}

int main()
{
    fin = fopen("a.txt", "r");
    init();
    getsym();
    print();
	system("pause");
    return 0;
}

const a=10;
var b,c;
procedure p;
begin
c:=b+a;
end;
begin
姓名:董楚梁1
end.
学号:202002156023

实验结果：

东来梁蕴秀

关注

3
点赞
踩
11

收藏

觉得还不错? 一键收藏
0
评论
pl/0词法分析器实验代码

pl0实验
复制链接

扫一扫

专栏目录