编译原理——Tiny词法分析器c++实现

Tiny语言和c-相比更为简单,在实现的时候,对文本的读取,发现回车换行是一个头疼的问题,fgetc()函数和fseek()函数并不是一一对应的。fseek()会回车换行算两个字符,而fgetc()读到’\r’或’\n’其中一个都会返回’\n’,也就是说把回车换行符当作一个字符。

代码如下:

#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define sfName "source.tiny"
#define tfName "target.tiny"
using namespace std;
FILE *source;
FILE *target;
int sequence=0; //目标文件序号
int s[5]={0}; //保存token类型的状态
char str[200]=""; //保存需要返回的字符串
char op[10]={'+','-','*','/',':','=','<',';','(',')'};
char line[200]="" //保存一行字符
char *resw[]={"if","then","else","end","repeat","until","read","write"};
char *token[]={"ID","NUM","RESW","OP","ERROR","COM"};
typedef enum{ID=0,NUM,RESW,OP,ERROR,COM}TokenType;

void clearState()
{
    memset(s,0,sizeof(s));
    memset(str,'\0',sizeof(str));
}

void printStr(TokenType t)
{
    if(t==5)
    {
        fprintf(target,"%d:%s\n",sequence,str);
    }else
    fprintf(target,"%d:<%s,%s>\n",sequence,token[t],str);
    sequence++;
}

void comment(char ch,int i)
{
    bool isExit = false;
    int count =0;
    while(!feof(source))
    {
        switch( s[i] )
        {
            case 0:
            if(ch=='{')
            {
                s[i]=1;
                str[count]=ch;
                count++;
            }
            break;
            case 1:
            if(ch=='}')
            {
                s[i]=2;
                isExit = true;
                str[count]=ch;
                printStr(COM);
            }else
            {
                s[i]=1;
                str[count]=ch;
                count++;
            }
            break;
            default:cout<<"data error\n";
        }
        if(isExit) return;
        ch = fgetc(source);
    }
}

void digit(char ch,int i)
{
    int cout=0;
    bool isExit = false;
    while(!feof(source))
    {
        if(ch>='0' && ch<='9')
        {
            s[i]=1;
            str[cout]=ch;
            cout++;
        }else
        {
            s[i]=0;
            fseek(source,-1L,1);
            printStr(NUM);
            isExit = true;
        }
        if(isExit) return;
        ch = fgetc(source);
    }
}

bool isOperator(char ch)
{
    for(int i=0;i<10;i++)
    {
        if(ch==op[i])
        return true;
    }
    return false;
}

void myOperator(char ch,int i)
{
    int count = 0;
    while(!feof(source))
    {
        if(ch=='+' || ch =='-'||ch=='*'||ch=='/'||ch=='<'||ch=='('||ch==')'||ch==';')
        {
            str[count]=ch;
            printStr(OP);
            return;
        }else if(ch==':' || ch=='=')
        {
            switch(s[i])
            {
                case 0:
                if(ch==':')
                {
                    s[i]=1;
                    str[count]=ch;
                    count++;
                }else
                {
                    s[i]=0;
                    str[count]=ch;
                    printStr(OP);
                    return;
                }
                break;
                case 1:
                if(ch=='=')
                {
                    s[i]=2;
                    str[count]=ch;
                    printStr(OP);
                    return;
                }
                break;
                default:cout<<"data error\n";
            }
        }else
        {
            str[count]=ch;
            printStr(ERROR);
            return;
        }
        ch = fgetc(source);
    }
}

bool isResw()
{
    for(int i=0;i<8;i++)
    {
        if(strcmp(str,resw[i])==0)
        return true;
    }
    return false;
}

void identifier(char ch,int i)
{
    int cout=0;
    while(!feof(source))
    {
        if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z'))
        {
            s[i]=1;
            str[cout]=ch;
            cout++;
        }else
        {
            s[i]=0;
            fseek(source,-1L,1); //回退一个字符
            if( isResw())
            printStr(RESW);
            else
            printStr(ID);
            return;
        }

        ch = fgetc(source);
    }
    if(ch == EOF)
        {
            if( isResw())
            printStr(RESW);
            else
            printStr(ID);
            return;
        }
}

void scanner()
{
    char ch,s;
    ch = fgetc(source);
    while(!feof(source))
    {
        for(int i=0;ch!='\n' || ch!=EOF;i++)
        {
            line[i]=ch;
            ch = fgetc(source);
        }
        for(int j=0;line[j]!='\0';j++)
        {

        }

        if(ch=='\n' ||ch==' '||ch=='\t')
        {
            ;//跳过
        }else if(ch=='{')
        {
            comment(ch,0);
        }else if(ch>='0' && ch<='9')
        {
            digit(ch,1);
        }else if( isOperator(ch) )
        {
            myOperator(ch,2);
        }else if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z'))
        {
            identifier(ch,3);
        }else
        {
            str[0] = ch;
            printStr(ERROR);
        }
        clearState();//清除状态
        ch = fgetc(source);
    }
}

int main()
{
    FILE *fp;
    char s;
    if((fp=fopen(sfName,"r"))==NULL)
    {
        printf("file open error!\n");
        exit(0);
    }else
    {
        s = fgetc(fp);
        while(!feof(fp))
        {
            printf("%c",s); // diaplay source.tiny
            s=fgetc(fp);
        }
        printf("\n\n");
    }
    fclose(fp);

    if((source=fopen(sfName,"r"))==NULL)
    {
        printf("文件打开错误!\n");
        exit(0);
    }
    if((target=fopen(tfName,"w"))==NULL)
    {
        printf("文件打开错误!\n");
        exit(0);
    }
    scanner();        //********************entrance******************************//
    fclose(source);
    fclose(target);
    cout<<"词法分析如下"<<endl;
    if((fp=fopen(tfName,"r"))==NULL)
    {
        printf("文件打开错误!\n");
        exit(0);
    }else
    {
        s = fgetc(fp);
        while(!feof(fp))
        {
            printf("%c",s); // diaplay target.tiny
            s=fgetc(fp);
        }
        printf("\n\n");
    }
    fclose(fp);
    return 0;
}

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值