编译原理-词法分析器(DFA,C语言描述,可分析C/C++词法)

最新推荐文章于 2024-07-02 22:22:32 发布

weixin_30664051

最新推荐文章于 2024-07-02 22:22:32 发布

阅读量1.7k

点赞数

文章标签： c/c++

原文链接：http://www.cnblogs.com/Estrong/archive/2012/11/05/2744548.html

版权

“单词”分类说明

标识符(Identifier):变量名和函数名(字母或下划线开头)；

关键字(Keyword):系统保留字；

运算符(Operator): + - * / % = == != < <= > >= 等；

分隔符(Separator): , ; . ' " ( ) [ ] { } // /* */ #等；

常量(Constant): 字符串或字符常量；

注释(Note): 注释不参与编译。

程序清单：

========================================================

词法分析器V1.0

(DFA，C语言描述，可分析C/C++词法)

Author:Estrong

2012.05.10

========================================================

说明：

标识符(Identifier):变量名和函数名(字母或下划线开头)；

关键字(Keyword):具有特定的功能系统保留字，不能移作他用；

运算符(Operator): + - * / % = == != < <= > >= 等；

分隔符(Separator): , ; . ' " ( ) [ ] { } // /米米/ #等；

常量(Constant):字符串或字符常量；

注释(Note): 注释不参与编译。

#include "stdio.h"

#include "string.h"

#define MAX_STR 256

#define MAX_KEYWORDS 62

#define MAX_OPERATORS 12

#define MAX_SEPARATORS 15

/* 定义分析状态 */

#define STA_START 1

#define STA_IDorKEYWORD 2 /* IDENTIFIER：标识符 */

#define STA_NUMBER 3 /* NUMBER：数字 */

#define STA_NOTE 4 /* NOTE: 注释 */

#define STA_CONSTANT 5 /* CONSTANT: 常量 */

#define STA_DONE 6 /* DONE：完成 */

/* 定义所属类型 */

#define TYPE_KEYWORD 1 /* KEYWORD：保留字 */

#define TYPE_IDENTIFIER 2 /* IDENTIFIER：标识符 */

#define TYPE_NUMBER 3 /* NUMBER：数字 */

#define TYPE_NOTE 4 /* NOTE: 注释 */

#define TYPE_CONSTANT 5 /* CONSTANT: 常量 */

#define TYPE_OPERATOR 6 /* OPERATOR: 运算符 */

#define TYPE_SEPARATOR 7 /* SEPARATOR: 分隔符 */

#define TYPE_ERROR 8 /* ERROR: 错误 */

#define TYPE_UNKNOWN 9 /* UNKNOWN: 未知 */

#define TYPE_ENDFILE 10 /* ENDFILE: 文件结束 */

char *Operators[MAX_OPERATORS] = {"+","-","*","/","%","=","==","!=","<","<=",">",">="};

char *Separators[MAX_SEPARATORS] ={",",";",".","\'","\"","(",")","[","]","{","}","//","/*","*/","#"};

char *Keywords[MAX_KEYWORDS] = {"include","define","auto","bool","break","case","catch","char","class",

"const","const_cast","continue","default","delete","do","double",

"dynamic_cast","else","enum","explicit","extern","false","float","for",

"friend","goto","if","inline","int","long","mutable","namespace","new",

"operator","private","protected","public","register","reinterpret_cast",

"return","short","signed","sizeof","static","static_cast","struct",

"switch","template","this","throw","true","try","typedef","typeid",

"typename","union","unsigned","using","virtual","void","volatile","while"};

/* 是否为运算符 */

int IsOperator(char c)

{

int i;

for(i=0;i<MAX_OPERATORS;i++)

if(Operators[i][0]==c)

return 1;

return 0;

}

/* 是否为分隔符 */

int IsSeparator(char c)

{

int i;

for(i=0;i<MAX_SEPARATORS;i++)

if(Separators[i][0]==c)

return 1;

return 0;

}

/* 是否为保留字 */

int IsKeyword(char *str)

{

int i;

for(i=0;i<MAX_KEYWORDS;i++)

if(strcmp(Keywords[i],str)==0)

return 1;

return 0;

}

/* DONE一次输出一次 */

void OutputOneDone(FILE *outf,int type,char *str)

{

if(IsKeyword(str)==1) type=TYPE_KEYWORD;

switch(type)

{

case TYPE_KEYWORD: fprintf(outf," KEYWORD: ");break;

case TYPE_IDENTIFIER: fprintf(outf," IDENTIFIER: ");break;

case TYPE_NUMBER: fprintf(outf," NUMBER: ");break;

case TYPE_NOTE: fprintf(outf," NOTE: ");break;

case TYPE_CONSTANT: fprintf(outf," CONSTANT: ");break;

case TYPE_OPERATOR: fprintf(outf," OPERATOR: ");break;

case TYPE_SEPARATOR: fprintf(outf," SEPARATOR: ");break;

case TYPE_ERROR: fprintf(outf," ERROR: ");break;

case TYPE_UNKNOWN: fprintf(outf," UNKNOWN: ");break;

default:break;

}

fprintf(outf,"%s\n",str);

}

/* DFA词法分析函数 */

void LexAnalyse(FILE *inf,FILE *outf)

{

char c;

char str[MAX_STR];/* 过程字符串 */

int i;

int line_no=1;/* 行号 */

int state,type;

char flag_limit_one_line;/* 标志为 / * 注释范围是一行 */

char flag_had_got_dot;/* 用于限定小数中只能有一个小数点 */

fprintf(outf,"Line %d--------------------------------------\n",line_no);

while(!feof(inf))

{

i=0;

state=STA_START;

flag_limit_one_line=0;

flag_had_got_dot=0;

while(state!=STA_DONE)

{

c=fgetc(inf);

switch(state)

{

case STA_START:

if( c==' ' || c=='\t');

else if(c=='\n')

{

line_no++;

fprintf(outf,"Line %d--------------------------------------\n",line_no);

}

else if( (c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_')

{

state=STA_IDorKEYWORD;

type=TYPE_IDENTIFIER;

str[i]=c; i++;

}

else if(c>='0' && c<='9')

{

state=STA_NUMBER;

type=TYPE_NUMBER;

str[i]=c; i++;

}

else if(c=='/')

{

str[i]=c; i++;

c=fgetc(inf);

if(c=='/') /* //注释 */

{

flag_limit_one_line=1;

state=STA_NOTE;

type=TYPE_NOTE;

str[i]=c; i++;

}

else if(c=='*') /* / * 注释开始 */

{

state=STA_NOTE;

type=TYPE_NOTE;

str[i]=c; i++;

}

else

{

state=STA_DONE;

type=TYPE_OPERATOR;

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

i=1;/* str[1]='\0'; */

}

else if(c=='<' || c=='>')

{

state=STA_DONE;

type=TYPE_OPERATOR;

str[0]=c;

c=fgetc(inf);

if(c=='=')

{

str[1]='=';i=2;/* str[2]='\0' */

}

else/* 暂未考虑<<,>>位移运算符的识别 */

{

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

i=1;/* str[1]='\0'; */

}

else if(c=='!')

{

state=STA_DONE;

str[0]=c;

c=fgetc(inf);

if(c=='=') /* 是!= */

{

type=TYPE_OPERATOR;

str[1]='=';i=2;/* str[2]='\0' */

}

else /* 非!= */

{

type=TYPE_UNKNOWN;

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

i=1;/* str[1]='\0'; */

}

else if(c=='\"' || c=='\'')

{

state=STA_CONSTANT;

type=TYPE_CONSTANT;

str[0]=c; i=1;

}

else if(IsOperator(c))

{

state=STA_DONE;

type=TYPE_OPERATOR;

str[0]=c; i=1;/* str[1]='\0'; */

}

else if(IsSeparator(c))

{

state=STA_DONE;

type=TYPE_SEPARATOR;

str[0]=c; i=1;/* str[1]='\0'; */

}

else if(c==EOF)

{

state=STA_DONE;

type=TYPE_ENDFILE;

}

else

{

state=STA_DONE;

type=TYPE_UNKNOWN;

}

break;/* case STA */

case STA_IDorKEYWORD:

if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') || c=='_' )

{

str[i]=c; i++;

}

else if(c=='.')

{

str[i]=c; i++;

c=fgetc(inf);

if((c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_')

{

str[i]=c; i++;

/* type=TYPE_IDENTIFIER; */

}

else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

{

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

state=STA_DONE;

type=TYPE_ERROR; /* 如 date. 或 date.1 标记为ERROR类型 */

}

else

{

str[i]=c; i++;

type=TYPE_ERROR;

}

else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

{

state=STA_DONE;

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

}

else

{

state=STA_DONE;

type=TYPE_ERROR;

}

break;/* case STA */

case STA_NUMBER:

if(c>='0' && c<='9')

{

str[i]=c; i++;

}

else if(c=='.') /* 小数识别 */

{

str[i]=c; i++;

c=fgetc(inf);

if(flag_had_got_dot==0)

{

if(c>='0' && c<='9')

{

str[i]=c; i++;

/* type=TYPE_NUMBER; */

flag_had_got_dot=1;

}

else /* if(c==' ' || c=='\t' || c=='\n' || c>='0' && c<='9' || ...) */

{

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

state=STA_DONE;

type=TYPE_ERROR; /* 如 date. 或 date.1 标记为ERROR类型 */

}

else

{

type=TYPE_ERROR;

str[i]=c; i++;

}

else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

{

state=STA_DONE;

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

}

else

{

str[i]=c; i++;

type=TYPE_ERROR;

}

break;/* case STA */

case STA_NOTE:

if(flag_limit_one_line==1) /* 是/ * 注释, 限定一行 */

{

if(c=='\n')

{

state=STA_DONE;

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

}

else

{

str[i]=c; i++;

}

else /* 是 / * 注释 */

{

if(feof(inf))

{

state=STA_DONE;

type=TYPE_ERROR;

}

else if(c=='\n')

{

line_no++;

str[i]=c; i++;

}

else if(c=='*')

{

str[i]=c; i++;

c=fgetc(inf);

if(c=='/')

{

state=STA_DONE;

str[i]=c; i++;

}

else

{

if(feof(inf))

{

state=STA_DONE;

type=TYPE_ERROR;

}

fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

}

else

{

str[i]=c; i++;

}

break;/* case STA */

case STA_CONSTANT:

if(feof(inf))

{

state=STA_DONE;

type=TYPE_ERROR;

}

else if(c=='\n')

{

line_no++;

str[i]=c; i++;

}

else if(c=='\"' || c=='\'')

{

state=STA_DONE;

str[i]=c; i++;

}

else if(c=='\\')

{

str[i]=c; i++;

c=fgetc(inf);

str[i]=c; i++;

}

else

{

str[i]=c; i++;

}

break;/* case STA */

case STA_DONE: break;/* case STA */

default: break;/* case STA */

}

}/* state=STA_DONE */

str[i]='\0';

OutputOneDone(outf,type,str);/* DONE一次输出一次 */

}/* feof(inf) */

}

/* 主函数 */

main()

{

FILE *input,*output;

if((input=fopen("input.c","r"))==NULL)

{

printf("Cannot find the file!\nStrike any key to exit!\n");

system("pause");

exit(1);

}

else

{

output=fopen("output.c","w");

LexAnalyse(input,output);

fprintf(output,"----------------------------------------------END OF FILE!\n");

fclose(input);

fclose(output);

printf("Lexical Analyzer has finished the analyzation!\nFor more information please see the file output.txt.\n");

system("pause");

}

转载于:https://www.cnblogs.com/Estrong/archive/2012/11/05/2744548.html

weixin_30664051

关注

0
点赞
踩
17

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫