词法分析主程序

最新推荐文章于 2020-06-21 18:29:58 发布
Godsight
最新推荐文章于 2020-06-21 18:29:58 发布
阅读量712
点赞数
分类专栏：词法分析文章标签：编译原理
本文链接：https://blog.csdn.net/Godsight/article/details/52738113
版权
词法分析专栏收录该内容
7 篇文章 0 订阅
订阅专栏
/**词法分析主程序**/

TkWord * tk_hashtable[MAXKEY];  //单词哈希表
DynArray tktable;               //单词动态数组，单词表中放置标识符，包括变量名，函数名，结构定义名
DynString tkstr;                //单词字符串
DynString sourcestr;            //单词源码字符串
int tkvalue;                    //单词值(单词为整型变量)
char ch;                        //当前取到的源码字符
int token;                      //单词编码
int line_num;                   //行号

/**从源文件中读取一个字符**/
void getch()
{
    ch=getc(fin);       /**文件尾返回EOF，其他返回实际字节值**/
}
/**
空白字符处理
忽略空格，Tab和回车
**/
void skip_white_space()
{
    while(ch==' '||ch=='\t'||ch=='\r')
    {
        if(ch=='\r')            /**windows下.c文件就是文本文件，而windows下文件行末结束符是\r\n，是俩个字符**/
        {
            getch();
            if(ch!='\n')
                return;
            line_num++;
        }
        printf("%c",ch);
        getch();
    }
}
/**
解析注释
**/
void parse_comment()
{
    getch();
    do
    {
        do
        {
            if(ch=='\n'||ch=='*'||ch==CH_EOF)
                break;
            else
                getch();
        }while(1);
        if(ch=='\n')        /**读到换行符，行数加一**/
        {
            line_num++;
            getch();
        }
        else if(ch=='*')    /**可能出现注释结束符，先往前读一个字符**/
        {
            getch();
            if(ch=='/')      /**跳出循环**/
            {
                getch();
                return;
            }
        }
        else
        {
            error("一直到文件尾未看到配对的注释结束符");
            return ;
        }
    }while(1);
}
/**预处理，忽略空白字符及注释**/
void preprocess()
{
    while(1)
    {
        if(ch==' '||ch=='\t'||ch=='\r')/**忽略空白字符**/
            skip_white_space();
        else if(ch=='/')
        {
            getch();                   /**向前多看一个字符**/
            if(ch=='*')                /**是注释就忽略**/
                parse_comment();
            else
            {
                ungetc(ch,fin);        /**不是注释就把字符回退到输入流**/
                ch='/';
                break;                 /**并结束循环**/
            }
        }
        else
            break;
    }
}
/**解析标识符**/
/**判断c是否为字母或下划线**/
int is_nodigit(char c)
{
    return (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_';
}
/**判断c是否是数字**/
int is_digit(char c)
{
    return c>='0'&&c<='9';
}
/**解析标识符**/
void parse_identifier()
{
    dynstring_reset(&tkstr);
    dynstring_chcat(&tkstr,ch);
    getch();
    while(is_nodigit(ch)||is_digit(ch))
    {
        dynstring_chcat(&tkstr,ch);
        getch();
    }
    dynstring_chcat(&tkstr,'\0');
}
/**解析整数**/
/**解析整型常量**/
void parse_num()
{
    dynstring_reset(&tkstr);
    dynstring_reset(&sourcestr);
    do{
        dynstring_chcat(&tkstr,ch);
        dynstring_chcat(&sourcestr,ch);
        getch();
    }while(is_digit(ch));
    if(ch=='.')
    {
        do{
        dynstring_chcat(&tkstr,ch);
        dynstring_chcat(&sourcestr,ch);
        getch();
        }while(is_digit(ch));
    }
    dynstring_chcat(&tkstr,'\0');
    dynstring_chcat(&sourcestr,'\0');
    tkvalue=atoi(tkstr.data);               /**把字符型转换为整型**/
}
/**解析字符串**/
/**
解析字符常量，字符串常量
sep:字符常量界符标识为单引号
    字符串常量界符标识为双引号
**/
void parse_string(char sep)
{
    char c;
    dynstring_reset(&tkstr);
    dynstring_reset(&sourcestr);
    dynstring_chcat(&sourcestr,sep);
    getch();
    for(;;)
    {
        if(ch==sep)
            break;                    /**界符匹配**/
        else if(ch=='\\')
        {
            dynstring_chcat(&sourcestr,ch);
            getch();
            switch(ch)                /**解析转义字符**/
            {
            case '0':
                c='\0';
                break;
            case 'a':
                c='\a';
                break;
            case 'b':                 /**退格**/
                c='\b';
                break;
            case 't':
                c='\t';
                break;
            case 'n':
                c='\n';
                break;
            case 'v':               /**垂直制表**/
                c='\v';
                break;
            case 'f':               /**换页**/
                c='\f';
                break;
            case 'r':
                c='\r';
                break;
            case '\"':
                c='\"';
                break;
            case '\'':
                c='\'';
                break;
            case '\\':
                c='\\';
                break;
            default:
                c=ch;
                if(c>='!'&&c<='~')
                    warning("非法转义字符:\'\\%c\'",c);
                else
                    warning("非法转义字符:\'\\0x%x\'",c);
                break;
            }
            dynstring_chcat(&tkstr,c);
            dynstring_chcat(&sourcestr,ch);
            getch();
        }
        else
        {
            dynstring_chcat(&tkstr,ch);
            dynstring_chcat(&sourcestr,ch);
            getch();
        }
    }
    dynstring_chcat(&tkstr,'\0');
    dynstring_chcat(&sourcestr,sep);
    dynstring_chcat(&sourcestr,'\0');
    getch();
}
/**取单词**/
void get_token()
{
    preprocess();
    switch(ch)
    {
        case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case'g':
        case 'h':case 'i':case 'j':case 'k':case 'l':case 'm':case'n':
        case 'o':case 'p':case 'q':case 'r':case 's':case 't':
        case 'u':case 'v':case 'w':case 'x':case 'y':case 'z':
        case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':case'G':
        case 'H':case 'I':case 'J':case 'K':case 'L':case 'M':case'N':
        case 'O':case 'P':case 'Q':case 'R':case 'S':case 'T':
        case 'U':case 'V':case 'W':case 'X':case 'Y':case 'Z':
        case '_':
            {
                TkWord *tp;
                parse_identifier();
                tp=tkword_insert(tkstr.data);
                token=tp->tkcode;
                break;
            }
        case '0':case '1':case '2':case '3':
        case '4':case '5':case '6':case '7':
        case '8':case '9':
            parse_num();
            token=TK_CINT;
            break;
        case '+':
            getch();
            token=TK_PLUS;
            break;
        case '-':
            getch();
            if(ch=='>')
            {
                token=TK_POINTSO;
                getch();
            }
            else
                token=TK_MINUS;
            break;
        case '/':
            token=TK_DIVIDE;
            getch();
            break;
        case '%':
            token=TK_MOD;
            getch();
            break;
        case '=':
            getch();
            if(ch=='=')
            {
                token=TK_EQ;
                getch();
            }
            else
                token=TK_ASSIGN;
            break;
        case '!':
            getch();
            if(ch=='=')
            {
                token=TK_NEQ;
                getch();
            }
            else
                error("暂不支持'!'（非操作符）");
            break;
        case '<':
            getch();
            if(ch=='=')
            {
                token=TK_LEQ;
                getch();
            }
            else
                token=TK_LF;
            break;
        case '>':
            getch();
            if(ch=='=')
            {
                token=TK_GEQ;
                getch();
            }
            else
                token=TK_GT;
            break;
        case '.':
            getch();
            if(ch=='.')
            {
                getch();
                if(ch!='.')
                    error("省略号拼写错误");
                else
                    token=TK_ELLIPSIS;
                getch();
            }
            else
            {
                token=TK_DOT;
            }
            break;
        case '&':
            token=TK_AND;
            getch();
            break;
        case ';':
            token=TK_SEMICOLON;
            getch();
            break;
        case ']':
            token=TK_CLOSEBR;
            getch();
            break;
        case '}':
            token=TK_END;
            getch();
            break;
        case ')':
            token=TK_CLOSEPA;
            getch();
            break;
        case '[':
            token=TK_OPENBR;
            getch();
            break;
        case '{':
            token=TK_BEGIN;
            getch();
            break;
        case '(':
            token=TK_OPENPA;
            getch();
            break;
        case ',':
            token=TK_COMMA;
            getch();
            break;
        case '*':
            token=TK_STAR;
            getch();
            break;
        case '\'':
            parse_string(ch);
            token=TK_CCHAR;
            tkvalue=*(char *)tkstr.data;    /**强制转换成字符的ascll码**/
            break;
        case '\"':
            parse_string(ch);
            token=TK_CSTR;
            break;
        case EOF:
            token=TK_EOF;
            break;
        default:
            error("不认识的字符:0x%02x",ch);/**把ch的值用16进制表示，%02x表示不足2位，高位就用0补充**/
            getch();
            break;
    }
}