PL/0语言词法分析

最新推荐文章于 2023-01-02 19:17:23 发布

谢谢๑‾᷅^‾᷅๑

最新推荐文章于 2023-01-02 19:17:23 发布

阅读量4.3k

点赞数 7

文章标签：编译器词法

本文链接：https://blog.csdn.net/weixin_44728197/article/details/105720264

版权

一、简介

PL0 语言功能简单、结构清晰、可读性强，而又具备了一般高级程序设计语言的必须部分，因而 PL0 语言的编译程序能充分体现一个高级语言编译程序实现的基本方法和技术。

二、设计思想

1、正规式r

基本字：begin、call、const、do、end、if、odd、procedure、read、then、var、while、write
标识符：(a|…|z|A|…|Z)( a|…|z|A|…|Z|0|…|9)*
常数：(1|…|9)(0|…|9)*
运算符：+、-、*、/、=、<>、<、<=、>、>=、:=
界符：(、)、,、;、.

2、NFA

NFA的设计就是将多个正规式（对应不同单词符号）通过引入新的起点X和终点Y串接起来。
其中标志符和基本字是一起识别的，统称字符串，在代码里面对字符串再做判断以识别是不是基本字；识别常数和识别字符串的NFA都要使用*表示最后读入的字符去除。
在这里插入图片描述

3、DFA

使用子集法

4、最小化的 DFA

使用集合划分法

三、算法流程

算法的流程图就是对各种单词符号进行判断。
在这里插入图片描述

四、源程序

语言：c++

#include<iostream>
#include<stdio.h>
#include<string.h>
#include<map>
using namespace std;
/*const a=10; var b,c; begin read(b); c:=a+b; write(c) end.*/
map<string,string> Keywords;//使用map来存储保留字和编码
void initialization_map()
{
    Keywords["begin"]="beginsym";
    Keywords["call"]="callsym";
    Keywords["const"]="constsym";
    Keywords["do"]="dosym";
    Keywords["end"]="endsym";
    Keywords["if"]="ifsym";
    Keywords["odd"]="oddsym";
    Keywords["procedure"]="proceduresym";
    Keywords["read"]="readsym";
    Keywords["then"]="thensym";
    Keywords["var"]="varsym";
    Keywords["while"]="whilesym";
    Keywords["write"]="writesym";
}

bool isNumber(char ch);//是否数字
bool isCase(char ch);//是否字母
bool isCaculationSymbol(char ch);//是否运算符
bool isBandSymbol(char ch);//是否边界符
void getInputStreamFromFile(const char *fileName, char *str);//读取文件
void getcode(char *str);//从cin读取文件
void calulationString(char *str);//运算符处理
void bandString(char *str);//边界符处理
void analysis(const char *InputFileName, char *str);//词法分析程序

//判断该位置的符号是否是数字
bool isNumber(char ch) {
        if(ch>='0' && ch<='9') {
        return true;
    } else {
        return false;
    }
}
//判断该位置的符号是否是字母
bool isCase(char ch) {
    if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z'))return true;
    else return false;
}
//判断该位置是否是运算符的基本单位
bool isCaculationSymbol(char ch)
{
    if(ch=='+'||ch=='-'||ch=='*'||ch=='/'||ch=='>'||ch=='<'||ch=='='||ch=='#'||ch==':')return true;
    else return false;
}
//判断该位置是否是边界符
bool isBandSymbol(char ch)
{
    if(ch=='('||ch==')'||ch==','||ch==';'||ch=='.')return true;
    else return false;
}
//连续的运算符处理
void calulationString(char *str)
{
    int a=strlen(str);
    for(int i=0;i<a;i++)
    {
        if(str[i]=='+') printf("(plus,+)\n");
        else if(str[i]=='-') printf("(minus,-)\n");
        else if(str[i]=='*') printf("(times,*)\n");
        else if(str[i]=='/') printf("(slash,/)\n");
        else if(str[i]=='=') printf("(eql,=)\n");
        else if(str[i]==':')//如果i位置是 ':',i+1位置不是 '=',那么该符号非法
        {
            if((i+1)<a&&str[i+1]=='=')
            {
                printf("(becomes,:=)\n");
                i++;
            } else printf("(%c,非法字符)\n",str[i]);

        }else if(str[i]=='>')
        {
            if((i+1)<a&&str[i+1]=='=')
            {
                printf("(geq,>=)\n");
                i++;
            }else printf("(gtr,>)\n");
        }else if(str[i]=='<')
        {
            if((i+1)<a&&str[i+1]=='=')
            {
                printf("(leq,<=)\n");
                i++;
            }else printf("(lss,<)\n");
        }else printf("(%c,非法字符)\n",str[i]);
    }
}
//获取一段连续的边界符号后，依次将其分解开
void bandString(char *str)
{
    int i,k=strlen(str);
    for( i=0;i<k;i++)
    {
        switch(str[i])
        {
            case '(':
                printf("(lparen,()\n");
                break;
            case ')':
                printf("(rparen,))\n");
                break;
            case ',':
                printf("(comma,,)\n");
                break;
            case ';':
                printf("(semicolon,;)\n");
                break;
            case '.':
                printf("(period,.)\n");
                break;
            default:
                break;
        }
    }
}
//从文件中获取将被分析的代码段
void getInputStreamFromFile(const char *fileName, char *str) {
    char ch;
    int i=0;
    FILE *fp;
    fp=fopen(fileName,"r");//以只读方式打开
    while((ch=fgetc(fp))!= EOF)
    {
        if(ch!='\n'&&ch!='\t')str[i++]=ch;//去掉换行、Tab
        else str[i++]=' ';
    }
    str[i]='\0';
    fclose(fp);
}
//从cin读取文件
void getcode(char *str)
{
    int i=0;
    //printf("输入:\n");
    char ch=getchar();
    while(ch!='.')
    {
        if(ch!='\n'&&ch!='\t')
            str[i++]=ch;
        else str[i++]=' ';//如果不用空字符替换，那么前后2个单词就连接到一起
        ch=getchar();
    }
    str[i]='.';
    str[i+1]='\0';
}
//词法分析函数
void analysis(const char *InputFileName,char *str)
{
    //getInputStreamFromFile(InputFileName,str);//读文件
    getcode(str);//通过终端手动输入读取
    int length=strlen(str),i=0,j,templength,c,d=0;
    char tempStr[100],smallStr[100];//存储一个单词，每个单词的长度是有限的
    while(i<length)
    {
        j=0;
        while(str[i]==' '&&i<length)//去掉开头的空格符号
        {
            i++;
        }
        while(str[i]!=' '&&i<length)//获取两个空格之间的一段代码字符串
        {
            tempStr[j++]=str[i++];
        }
        tempStr[j]='\0';
        templength=strlen(tempStr);
        c=0;
        while(c<templength)
        {
            if(isCase(tempStr[c]))//如果以字母开头
            {
                while((!isCaculationSymbol(tempStr[c]))&&(!isBandSymbol(tempStr[c]))&&c<templength)//获取全是字母和数字的一段串
                {
                    smallStr[d++]=tempStr[c++];
                }
                smallStr[d]='\0';
                map<string,string>::iterator iter;
                iter=Keywords.find(string(smallStr));//find函数返回一个指向数据的迭代器，未找到时指向end
                if(iter!=Keywords.end())//如果字符串是保留字
                {
                    cout<<"("<<iter->second<<","<<iter->first<<")"<<endl;
                    strcpy(smallStr,"");//将这两个临时存储结构清空
                    d=0;
                }
                else//字符串是标识符
                {
                    cout<<"(ident,"<<smallStr<<")"<<endl;
                    strcpy(smallStr,"");
                    d=0;
                }
            }
            else if(isNumber(tempStr[c]))//如果以数字开头
            {
                while(isNumber(tempStr[c])&&c<templength)//截取全是数字的一段字符串
                {
                    smallStr[d++]=tempStr[c++];
                }
                smallStr[d]='\0';
                cout<<"(number,"<<smallStr<<")"<<endl;
                strcpy(smallStr,"");
                d=0;
            }
            else if(isCaculationSymbol(tempStr[c]))//以运算符开头
            {
                while(isCaculationSymbol(tempStr[c])&&c<templength)
                {
                    smallStr[d++]=tempStr[c++];
                }
                smallStr[d]='\0';
                calulationString(smallStr);//处理连续的运算符
                strcpy(smallStr,"");
                d=0;
            }
            else if(isBandSymbol(tempStr[c]))//如果以边界符开头
            {
                while(isBandSymbol(tempStr[c])&&c<templength)
                {
                    smallStr[d++]=tempStr[c++];
                }
                smallStr[d]='\0';
                bandString(smallStr);//处理连续的界限符
                strcpy(smallStr,"");
                d=0;
            }
            else
            {
                cout<<"(ERROR,"<<tempStr[c]<<")"<<endl;
                c++;
            }
        }
        strcpy(tempStr,"");
    }
    fclose(stdout);
}

int main()
{
    initialization_map();//初始化
    char str[100000];//以字符的方式存放文件的数据
    const char *input="./code.txt";//文件保存在根目录
    analysis(input,str);
    return 0;
}