/*
作品:词法分析器(scanner)
日期:2023年10月4日
作者:chu
*/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define filePosMax 128//文件名最大
#define bufMax 1024//缓冲区最大
#define keyLensMax 10
int columnTep = 0;
char tokenBuf[bufMax] = {0};//token缓冲区
char tokenTmp[50] = {0};//临时token
//关键字集合
char keywords[][keyLensMax] = {"char","int","float","break","const","return","void","continue","do","while","if","else","for","main","include"};
//边界符集合
char bowords[10] = {'#','{','}',';',','};
//操作符集合
char opword1[15] = {'(',')','[',']','*','%','+','-','.'};
//缓冲区结构体
typedef struct{
int CurRow;//当前行
char buf[bufMax];//缓冲区
int CurColumn;//当前列
int cnt;//字符总数,指向当前识别的字符位置
char CurChar;//当前字符
}Buf,* pBuf;
//判断字符串是否是小数
bool isDecimal(char *str);
bool isLetter(char ch);//是否为字母
bool isDigit(char ch);//是否为数字
bool isWs(char ch);//是否是空白符号
/*
输入缓存区同时识别单词(弃用)
先输入到缓冲区在识别单词
*/
void inputBuf(char * buf, char *filePos);
//欢迎显示
void welcome();
//获取下一个字符
void getNextChar(pBuf buf);
//获取标识符的token
void getIdToken(char * ch);
//获取数字的token
void getDigToken(char * ch);
//识别标识符
void recognizeId(pBuf buf);
//识别数字
void recognizeDig(pBuf buf);
//扫描识别单词
void scanWord(pBuf buf);
//识别边界符和操作符
void recognizeBoOp(pBuf buf);
//回退
void goBack(pBuf buf);
//token写入文件
void writeToken(char * tokenBuf,char * tokenFile);
int main(void)
{
Buf Buf;
char buf[bufMax] = {0};//中间缓冲区
//扫描文件位置(用户输入)
char filePos[filePosMax] = {0};
//token文件存放位置
char tokenFile[filePosMax] = "E:\\study\\PrincipleOfCompiler\\tokendata.dat";
welcome();
//初始化buf
memset(Buf.buf,0,sizeof(Buf.buf));
Buf.CurColumn = 0;
Buf.CurRow = 1;
Buf.cnt = 0;
Buf.CurChar = 0;
printf("请输入源代码具体位置:\n");
scanf("%s",filePos);
inputBuf(buf,filePos);//1.将文件输入缓存区
strcpy(Buf.buf,buf);
printf("===源代码如下===\n");
printf("%s",Buf.buf);//打印
printf("\n===token值如下===\n");
//2.按构词规则识别单词,输出单词本身及其种别码
scanWord(&Buf);
//printf("%s",tokenBuf);
writeToken(tokenBuf,tokenFile);
printf("写入完毕,token文件在E:\\study\\PrincipleOfCompiler\\tokendata.dat\n");
printf("感谢您的使用!\n");
return 0;
}
//判断字符串是否是小数
bool isDecimal(char *str) {
char *ptr = str;
bool hasDot = false;
if (*ptr == '-' || *ptr == '+') {
ptr++;
}
while (*ptr != '\0') {
if (*ptr == '.') {
if (hasDot) {
return false;
}
hasDot = true;
} else if (*ptr < '0' || *ptr > '9') {
return false;
}
ptr++;
}
return hasDot;
}
bool isLetter(char ch)//是否为字母
{
if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z')
return true;
return false;
}
bool isDigit(char ch)//是否为数字
{
if(ch >= '0' && ch <= '9')
return true;
return false;
}
bool isWs(char ch)//是否是空白符号
{
if(ch == '\0' || ch == '\t' || ch == '\n' || ch == '\v' || ch == '\f' || ch == ' ')
return true;
return false;
}
void writeToken(char * tokenBuf,char * tokenFile)
{
FILE *wf;
wf = fopen(tokenFile,"w");
if (wf == NULL)
{
perror("open file for reading");
exit(0);
}
fputs(tokenBuf,wf);
fclose(wf);
}
void inputBuf(char * buf,char * filePos)
{
FILE* f;
int ch,cnt = 0;
f = fopen(filePos, "r");
if (f == NULL)
{
perror("open file for reading");
exit(0);
}
while(EOF != (ch=fgetc(f)))//读取到文件结束为止
{
buf[cnt++] = ch;//输入缓存区
}
fclose(f);
}
void welcome(){
printf("=========欢迎使用词法分析器=========\n");
printf("===========系统种别码如下===========\n\n");
printf("--------------key-------------\n");
printf("char : 101\tint : 102\n");
printf("float : 103\tbreak : 104\n");
printf("const : 105\treturn : 106\n");
printf("void : 107\tcontinue : 108\n");
printf("do : 109\twhile : 110\n");
printf("if : 111\telse : 112\n");
printf("for : 113\t\n");
printf("-----------Boundary-----------\n");
printf("# : 301\n");
printf("{ : 302\t} : 303\n");
printf("; : 304\t, : 305\n");
printf("-------------Other------------\n");
printf("integer : 400\n");
printf("char : 500\n");
printf("string : 600\n");
printf("id : 700\n");
printf("float : 800\n");
printf("--------------op-------------\n");
printf("( : 201\t) : 202\n");
printf("[ : 203\t] : 204\n");
printf("* : 205\t%% : 206\n");
printf("+ : 207\t- : 208\n");
printf(". : 209\t/ : 210\n");
printf("> : 211\t>= : 212\n");
printf("< : 213\t<= : 214\n");
printf("= : 215\t== : 216\n");
printf("! : 217\t!= : 218\n");
printf("&& : 219\t|| : 220\n");
printf("---------author:zsj----------\n");
}
//获取下一个字符
void getNextChar(pBuf buf)
{
buf->CurChar = buf->buf[buf->cnt++];//存储下一个字符
columnTep = buf->CurColumn;//存储上一列值
++buf->CurColumn;//列数增加1
if(buf->CurChar == '\n')//如果当前字符是回车
{
buf->CurRow++;//行加1
buf->CurColumn = 1;//列重置为1
}
// printf("CurChar = %c,CurRow = %d, CurColumn = %d\n",buf->CurChar,buf->CurRow,buf->CurColumn);
}
void goBack(pBuf buf)
{
--buf->cnt;//指向前一个字符
--buf->CurColumn;//列数减1
if(buf->CurChar == '\n')//如果当前字符是换行符的话
{
--buf->CurRow;//行数减1
buf->CurColumn = columnTep;//列数回溯到上一行的列数
}
buf->CurChar = buf->buf[buf->cnt-1];//重置当前字符
}
//获取标识符的token
void getIdToken(char * ch)
{
//printf("ch = %s\n",ch);
int token;
int len = sizeof(keywords)/keyLensMax;
for(int i = 0; i < len; ++i)//遍历所有关键字
{
token = 101 + i;//每个关键字对应的token值
if(strcmp(ch,keywords[i]) == 0)//如果与当前字符对应
{
printf("(\"%s\",%d)\n",ch,token);
sprintf(tokenTmp,"(\"%s\",%d)\n",ch,token);
strcat(tokenBuf,tokenTmp);
return;
}
}
if(i >= len)//如果都比对了还是没
{
printf("(\"%s\",700)\n",ch);
sprintf(tokenTmp,"(\"%s\",700)\n",ch);
strcat(tokenBuf,tokenTmp);
}
}
void recognizeId(pBuf buf)
{
char ch[10] = {0};
int i = 0;
char state = '1';
while(state != '2')
{
if(isLetter(buf->CurChar) || buf->CurChar == '_')
{
ch[i++] = buf->CurChar;
getNextChar(buf);
}
else
state = '2';
}
goBack(buf);
getIdToken(ch);
}
void getDigToken(char * ch)
{
//printf("ch = %s\n",ch);
if(isDecimal(ch))
{
printf("(\"%s\",800)\n",ch);
sprintf(tokenTmp,"(\"%s\",800)\n",ch);
strcat(tokenBuf,tokenTmp);
}
else
{
printf("(\"%s\",400)\n",ch);
sprintf(tokenTmp,"(\"%s\",400)\n",ch);
strcat(tokenBuf,tokenTmp);
}
}
void recognizeDig(pBuf buf)
{
char ch[10] = {0};
int i = 0;
char state = '1';
while(state != '2')
{
if(isDigit(buf->CurChar) || buf->CurChar == '.')
{
ch[i++] = buf->CurChar;
getNextChar(buf);
}
else
state = '2';
}
goBack(buf);
getDigToken(ch);
}
void recognizeBoOp(pBuf buf)
{
int token = 301;
char state = '0';
for(int i = 0; i < strlen(bowords); ++i)
{
if(buf->CurChar == bowords[i])
{
printf("(\"%c\",%d)\n",buf->CurChar,token+i);
sprintf(tokenTmp,"(\"%c\",%d)\n",buf->CurChar,token+i);
strcat(tokenBuf,tokenTmp);
state = '1';
return;
}
}
if(state != '0')
return;
else{
int tokenop = 201;
for(int j = 0; j < strlen(opword1); ++j)
{
if(buf->CurChar == opword1[j])
{
printf("(\"%c\",%d)\n",buf->CurChar,tokenop+j);
sprintf(tokenTmp,"(\"%c\",%d)\n",buf->CurChar,tokenop+j);
strcat(tokenBuf,tokenTmp);
state = '2';
return;
}
}
}
if(state != '0')
return;
else{
if(buf->CurChar == '>')
{
getNextChar(buf);
if(buf->CurChar == '=')
{
printf("(\">=\",%d)\n",212);
sprintf(tokenTmp,"(\">=\",%d)\n",212);
strcat(tokenBuf,tokenTmp);
return;
}
else
{
goBack(buf);
printf("(\">\",%d)\n",211);
sprintf(tokenTmp,"(\">\",%d)\n",211);
strcat(tokenBuf,tokenTmp);
return;
}
}
else if(buf->CurChar == '<')
{
getNextChar(buf);
if(buf->CurChar == '=')
{
printf("(\"<=\",%d)\n",214);
sprintf(tokenTmp,"(\"<=\",%d)\n",214);
strcat(tokenBuf,tokenTmp);
return;
}
else
{
goBack(buf);
printf("(\"<\",%d)\n",213);
sprintf(tokenTmp,"(\"<\",%d)\n",213);
strcat(tokenBuf,tokenTmp);
return;
}
}
else if(buf->CurChar == '=')
{
getNextChar(buf);
if(buf->CurChar == '=')
{
printf("(\"==\",%d)\n",216);
sprintf(tokenTmp,"(\"==\",%d)\n",216);
strcat(tokenBuf,tokenTmp);
return;
}
else
{
goBack(buf);
printf("(\"=\",%d)\n",215);
sprintf(tokenTmp,"(\"=\",%d)\n",215);
strcat(tokenBuf,tokenTmp);
return;
}
}
else if(buf->CurChar == '!')
{
getNextChar(buf);
if(buf->CurChar == '=')
{
printf("(\"!=\",%d)\n",218);
sprintf(tokenTmp,"(\"!=\",%d)\n",218);
strcat(tokenBuf,tokenTmp);
return;
}
else
{
goBack(buf);
printf("(\"!\",%d)\n",217);
sprintf(tokenTmp,"(\"!\",%d)\n",217);
strcat(tokenBuf,tokenTmp);
return;
}
}
else if(buf->CurChar == '&')
{
getNextChar(buf);
if(buf->CurChar == '&')
{
printf("(\"&&\",%d)\n",219);
sprintf(tokenTmp,"(\"&&\",%d)\n",219);
strcat(tokenBuf,tokenTmp);
return;
}
else
{
goBack(buf);
printf("第%d行第%d列有运算符错误!\n",buf->CurRow,buf->CurColumn);
sprintf(tokenTmp,"第%d行第%d列有运算符错误!\n",buf->CurRow,buf->CurColumn);
strcat(tokenBuf,tokenTmp);
return;
}
}
else if(buf->CurChar == '|')
{
getNextChar(buf);
if(buf->CurChar == '|')
{
printf("(\"||\",%d)\n",220);
sprintf(tokenTmp,"(\"||\",%d)\n",220);
strcat(tokenBuf,tokenTmp);
return;
}
else
{
goBack(buf);
printf("第%d行第%d列有运算符错误!\n",buf->CurRow,buf->CurColumn);
sprintf(tokenTmp,"第%d行第%d列有运算符错误!\n",buf->CurRow,buf->CurColumn);
strcat(tokenBuf,tokenTmp);
return;
}
}
else
{
printf("第%d行第%d列有未知符号!-->%c\n",buf->CurRow,buf->CurColumn,buf->CurChar);
sprintf(tokenTmp,"第%d行第%d列有未知符号!-->%c\n",buf->CurRow,buf->CurColumn,buf->CurChar);
strcat(tokenBuf,tokenTmp);
}
}
}
//扫描识别单词
void scanWord(pBuf buf)
{
getNextChar(buf);//获取首字符
while('\0' != buf->CurChar)//一直扫描到buf末尾
{
if(!isWs(buf->CurChar))//如果不是空白字符
{
if(isLetter(buf->CurChar) || buf->CurChar == '_')//如果是字母或下划线(标识符或关键字)
{
recognizeId(buf);
}
else if(isDigit(buf->CurChar))//如果是数字可能是数值常量
{
recognizeDig(buf);
}
else if(buf->CurChar == '/')//可能是除号或注释
{
getNextChar(buf);
if(buf->CurChar == '/')//注释不用管
{
while(buf->CurChar != '\n')
{
getNextChar(buf);
}
}
else if(buf->CurChar == '*')
{
getNextChar(buf);
while(buf->CurChar != '*' || buf->buf[buf->cnt] != '/')
{
getNextChar(buf);
}
}
else if(!isWs(buf->CurChar)){
goBack(buf);
printf("(\"%c\",210)\n",buf->CurChar);
sprintf(tokenTmp,"(\"%c\",210)\n",buf->CurChar);
strcat(tokenBuf,tokenTmp);
}
}
else if(buf->CurChar == '\'')//可能是字符常量
{
char tmp;
int i = 0;
bool wrong = false;
getNextChar(buf);
tmp = buf->CurChar;
getNextChar(buf);
while(buf->CurChar != '\'')
{
++i;
getNextChar(buf);
if(buf->CurChar == '\n')
{
goBack(buf);
printf("第%d行第%d列有字符错误!\n",buf->CurRow,buf->CurColumn);
sprintf(tokenTmp,"第%d行第%d列有字符错误!\n",buf->CurRow,buf->CurColumn);
strcat(tokenBuf,tokenTmp);
wrong = true;
break;
}
}
if(!wrong)
if(i > 0)
{
printf("第%d行第%d列有字符错误!\n",buf->CurRow,buf->CurColumn);
sprintf(tokenTmp,"第%d行第%d列有字符错误!\n",buf->CurRow,buf->CurColumn);
strcat(tokenBuf,tokenTmp);
}
else
{
printf("(\"%c\",500)\n",tmp);
sprintf(tokenTmp,"(\"%c\",500)\n",tmp);
strcat(tokenBuf,tokenTmp);
}
}
else if(buf->CurChar == '"')//可能是字符串常量
{
int index = 0;
char sta = '0';
char strcon[128] = {0};
getNextChar(buf);
while(buf->CurChar != '"')
{
if(buf->CurChar == '\n')
{
goBack(buf);
printf("第%d行第%d列有字符串错误!\n",buf->CurRow,buf->CurColumn);
sprintf(tokenTmp,"第%d行第%d列有字符串错误!\n",buf->CurRow,buf->CurColumn);
strcat(tokenBuf,tokenTmp);
sta = '1';
break;
}
strcon[index++] = buf->CurChar;
getNextChar(buf);
}
if(sta == '0')
{
printf("(\"%s\",600)\n",strcon);
sprintf(tokenTmp,"(\"%s\",600)\n",strcon);
strcat(tokenBuf,tokenTmp);
}
}
else//可能是界符或运算符
{
recognizeBoOp(buf);
}
}
getNextChar(buf);
}
}
编译原理-词法分析器源码
于 2023-10-20 15:20:05 首次发布