编译原理-词法分析器源码

/*
	作品:词法分析器(scanner)
	日期:2023年10月4日
	作者:chu
*/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#define filePosMax 128//文件名最大
#define bufMax 1024//缓冲区最大
#define keyLensMax 10

int columnTep = 0;
char tokenBuf[bufMax] = {0};//token缓冲区
char tokenTmp[50] = {0};//临时token
//关键字集合
char keywords[][keyLensMax] = {"char","int","float","break","const","return","void","continue","do","while","if","else","for","main","include"};
//边界符集合
char bowords[10] = {'#','{','}',';',','};
//操作符集合
char opword1[15] = {'(',')','[',']','*','%','+','-','.'};

//缓冲区结构体
typedef struct{
	int CurRow;//当前行
	char buf[bufMax];//缓冲区
	int CurColumn;//当前列
	int cnt;//字符总数,指向当前识别的字符位置
	char CurChar;//当前字符
}Buf,* pBuf;
//判断字符串是否是小数
bool isDecimal(char *str);
bool isLetter(char ch);//是否为字母
bool isDigit(char ch);//是否为数字
bool isWs(char ch);//是否是空白符号
/*
	输入缓存区同时识别单词(弃用)
	先输入到缓冲区在识别单词
*/
void inputBuf(char * buf, char *filePos);
//欢迎显示
void welcome();
//获取下一个字符
void getNextChar(pBuf buf);
//获取标识符的token
void getIdToken(char * ch);
//获取数字的token
void getDigToken(char * ch);
//识别标识符
void recognizeId(pBuf buf);
//识别数字
void recognizeDig(pBuf buf);
//扫描识别单词
void scanWord(pBuf buf);
//识别边界符和操作符
void recognizeBoOp(pBuf buf);
//回退
void goBack(pBuf buf);
//token写入文件
void writeToken(char * tokenBuf,char * tokenFile);

int main(void)
{
	Buf Buf;
	char buf[bufMax] = {0};//中间缓冲区
	//扫描文件位置(用户输入)
	char filePos[filePosMax] = {0};
	//token文件存放位置
	char tokenFile[filePosMax] = "E:\\study\\PrincipleOfCompiler\\tokendata.dat";

	welcome();
	
	//初始化buf
	memset(Buf.buf,0,sizeof(Buf.buf));
	Buf.CurColumn = 0;
	Buf.CurRow = 1;
	Buf.cnt = 0;
	Buf.CurChar = 0;

	printf("请输入源代码具体位置:\n");
	scanf("%s",filePos);
	
	inputBuf(buf,filePos);//1.将文件输入缓存区
	strcpy(Buf.buf,buf);
	printf("===源代码如下===\n");
	printf("%s",Buf.buf);//打印
	
	printf("\n===token值如下===\n");
	//2.按构词规则识别单词,输出单词本身及其种别码
	scanWord(&Buf);
	//printf("%s",tokenBuf);
	writeToken(tokenBuf,tokenFile);
	printf("写入完毕,token文件在E:\\study\\PrincipleOfCompiler\\tokendata.dat\n");
	printf("感谢您的使用!\n");

	return 0;
}
//判断字符串是否是小数
bool isDecimal(char *str) {
    char *ptr = str;
    bool hasDot = false;

    if (*ptr == '-' || *ptr == '+') {
        ptr++;
    }

    while (*ptr != '\0') {
        if (*ptr == '.') {
            if (hasDot) {
                return false;
            }
            hasDot = true;
        } else if (*ptr < '0' || *ptr > '9') {
            return false;
        }
        ptr++;
    }
    
    return hasDot;
}
bool isLetter(char ch)//是否为字母
{
	if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z')
		return true;
	return false;
}
bool isDigit(char ch)//是否为数字
{
	if(ch >= '0' && ch <= '9')
		return true;
	return false;
}
bool isWs(char ch)//是否是空白符号
{
	if(ch == '\0' || ch == '\t' || ch == '\n' || ch == '\v' || ch == '\f' || ch == ' ')
		return true;
	return false;
}
void writeToken(char * tokenBuf,char * tokenFile)
{
	FILE *wf;
	wf = fopen(tokenFile,"w");
    if (wf == NULL)
    {
		perror("open file for reading");
        exit(0);
    }
	fputs(tokenBuf,wf);
	fclose(wf);
}
void inputBuf(char * buf,char * filePos)
{
	FILE* f;
	int ch,cnt = 0;

    f = fopen(filePos, "r");
    if (f == NULL)
    {
		perror("open file for reading");
        exit(0);
    }
	while(EOF != (ch=fgetc(f)))//读取到文件结束为止
	{
		buf[cnt++] = ch;//输入缓存区
	}
	fclose(f);
}
void welcome(){
	printf("=========欢迎使用词法分析器=========\n");
	printf("===========系统种别码如下===========\n\n");
	printf("--------------key-------------\n");
	printf("char  : 101\tint      : 102\n");
	printf("float : 103\tbreak    : 104\n");
	printf("const : 105\treturn   : 106\n");
	printf("void  : 107\tcontinue : 108\n");
	printf("do    : 109\twhile	 : 110\n");
	printf("if    : 111\telse	 : 112\n");
	printf("for   : 113\t\n");
	printf("-----------Boundary-----------\n");
	printf("#     : 301\n");
	printf("{     : 302\t}        : 303\n");
	printf(";     : 304\t,        : 305\n");
	printf("-------------Other------------\n");
	printf("integer : 400\n");
	printf("char    : 500\n");
	printf("string  : 600\n");
	printf("id	: 700\n");
	printf("float	: 800\n");
	printf("--------------op-------------\n");
	printf("(	: 201\t)	: 202\n");
	printf("[	: 203\t]	: 204\n");
	printf("*	: 205\t%%	: 206\n");
	printf("+	: 207\t-	: 208\n");
	printf(".	: 209\t/	: 210\n");
	printf(">	: 211\t>=	: 212\n");
	printf("<	: 213\t<=	: 214\n");
	printf("=	: 215\t==	: 216\n");
	printf("!	: 217\t!=	: 218\n");
	printf("&&	: 219\t||	: 220\n");
	printf("---------author:zsj----------\n");
}
//获取下一个字符
void getNextChar(pBuf buf)
{
	buf->CurChar = buf->buf[buf->cnt++];//存储下一个字符
	columnTep = buf->CurColumn;//存储上一列值
	++buf->CurColumn;//列数增加1
	if(buf->CurChar == '\n')//如果当前字符是回车
	{
		buf->CurRow++;//行加1
		buf->CurColumn = 1;//列重置为1
	}
//	printf("CurChar = %c,CurRow = %d, CurColumn = %d\n",buf->CurChar,buf->CurRow,buf->CurColumn);
}
void goBack(pBuf buf)
{
	--buf->cnt;//指向前一个字符
	--buf->CurColumn;//列数减1
	if(buf->CurChar == '\n')//如果当前字符是换行符的话
	{
		--buf->CurRow;//行数减1
		buf->CurColumn = columnTep;//列数回溯到上一行的列数
	}
	buf->CurChar = buf->buf[buf->cnt-1];//重置当前字符
}
//获取标识符的token
void getIdToken(char * ch)
{
	//printf("ch = %s\n",ch);
	int token;
	int len = sizeof(keywords)/keyLensMax;
	for(int i = 0; i < len; ++i)//遍历所有关键字
	{
		token = 101 + i;//每个关键字对应的token值
		if(strcmp(ch,keywords[i]) == 0)//如果与当前字符对应
		{
			printf("(\"%s\",%d)\n",ch,token);
			sprintf(tokenTmp,"(\"%s\",%d)\n",ch,token);
			strcat(tokenBuf,tokenTmp);
			return;
		}		
	}
	if(i >= len)//如果都比对了还是没
	{
		printf("(\"%s\",700)\n",ch);
		sprintf(tokenTmp,"(\"%s\",700)\n",ch);
		strcat(tokenBuf,tokenTmp);
	}
}
void recognizeId(pBuf buf)
{
	char ch[10] = {0};
	int i = 0;
	char state = '1';
	while(state != '2')
	{
		if(isLetter(buf->CurChar) || buf->CurChar == '_')
		{
			ch[i++] = buf->CurChar;
			getNextChar(buf);
		}
		else 
			state = '2';
	}
	goBack(buf);
	getIdToken(ch);
}

void getDigToken(char * ch)
{
	//printf("ch = %s\n",ch);
	if(isDecimal(ch))
	{
		printf("(\"%s\",800)\n",ch);
		sprintf(tokenTmp,"(\"%s\",800)\n",ch);
		strcat(tokenBuf,tokenTmp);
	}
	else
	{
		printf("(\"%s\",400)\n",ch);
		sprintf(tokenTmp,"(\"%s\",400)\n",ch);
		strcat(tokenBuf,tokenTmp);
	}		
}
void recognizeDig(pBuf buf)
{
	char ch[10] = {0};
	int i = 0;
	char state = '1';
	while(state != '2')
	{
		if(isDigit(buf->CurChar) || buf->CurChar == '.')
		{
			ch[i++] = buf->CurChar;
			getNextChar(buf);
		}
		else
			state = '2';
	}
	goBack(buf);
	getDigToken(ch);
}
void recognizeBoOp(pBuf buf)
{
	int token = 301;
	char state = '0';
	for(int i = 0; i < strlen(bowords); ++i)
	{
		if(buf->CurChar == bowords[i])
		{
			printf("(\"%c\",%d)\n",buf->CurChar,token+i);
			sprintf(tokenTmp,"(\"%c\",%d)\n",buf->CurChar,token+i);
			strcat(tokenBuf,tokenTmp);
			state = '1';
			return;
		}
	}
	if(state != '0')
		return;
	else{
		int tokenop = 201;
		for(int j = 0; j < strlen(opword1); ++j)
		{
			if(buf->CurChar == opword1[j])
			{
				printf("(\"%c\",%d)\n",buf->CurChar,tokenop+j);
				sprintf(tokenTmp,"(\"%c\",%d)\n",buf->CurChar,tokenop+j);
				strcat(tokenBuf,tokenTmp);
				state = '2';
				return;
			}
		}
	}
	if(state != '0')
		return;
	else{
		if(buf->CurChar == '>')
		{
			getNextChar(buf);
			if(buf->CurChar == '=')
			{
				printf("(\">=\",%d)\n",212);
				sprintf(tokenTmp,"(\">=\",%d)\n",212);
				strcat(tokenBuf,tokenTmp);
				return;
			}
			else
			{
				goBack(buf);
				printf("(\">\",%d)\n",211);
				sprintf(tokenTmp,"(\">\",%d)\n",211);
				strcat(tokenBuf,tokenTmp);
				return;
			}
		}
		else if(buf->CurChar == '<')
		{
			getNextChar(buf);
			if(buf->CurChar == '=')
			{
				printf("(\"<=\",%d)\n",214);
				sprintf(tokenTmp,"(\"<=\",%d)\n",214);
				strcat(tokenBuf,tokenTmp);
				return;
			}
			else
			{
				goBack(buf);
				printf("(\"<\",%d)\n",213);
				sprintf(tokenTmp,"(\"<\",%d)\n",213);
				strcat(tokenBuf,tokenTmp);
				return;
			}
		}
		else if(buf->CurChar == '=')
		{
			getNextChar(buf);
			if(buf->CurChar == '=')
			{
				printf("(\"==\",%d)\n",216);
				sprintf(tokenTmp,"(\"==\",%d)\n",216);
				strcat(tokenBuf,tokenTmp);
				return;
			}
			else
			{
				goBack(buf);
				printf("(\"=\",%d)\n",215);
				sprintf(tokenTmp,"(\"=\",%d)\n",215);
				strcat(tokenBuf,tokenTmp);
				return;
			}
		}
		else if(buf->CurChar == '!')
		{
			getNextChar(buf);
			if(buf->CurChar == '=')
			{
				printf("(\"!=\",%d)\n",218);
				sprintf(tokenTmp,"(\"!=\",%d)\n",218);
				strcat(tokenBuf,tokenTmp);
				return;
			}
			else
			{
				goBack(buf);
				printf("(\"!\",%d)\n",217);
				sprintf(tokenTmp,"(\"!\",%d)\n",217);
				strcat(tokenBuf,tokenTmp);
				return;
			}
		}
		else if(buf->CurChar == '&')
		{
			getNextChar(buf);
			if(buf->CurChar == '&')
			{
				printf("(\"&&\",%d)\n",219);
				sprintf(tokenTmp,"(\"&&\",%d)\n",219);
				strcat(tokenBuf,tokenTmp);
				return;
			}
			else
			{
				goBack(buf);
				printf("第%d行第%d列有运算符错误!\n",buf->CurRow,buf->CurColumn);
				sprintf(tokenTmp,"第%d行第%d列有运算符错误!\n",buf->CurRow,buf->CurColumn);
				strcat(tokenBuf,tokenTmp);
				return;
			}
		}
		else if(buf->CurChar == '|')
		{
			getNextChar(buf);
			if(buf->CurChar == '|')
			{
				printf("(\"||\",%d)\n",220);
				sprintf(tokenTmp,"(\"||\",%d)\n",220);
				strcat(tokenBuf,tokenTmp);
				return;
			}
			else
			{
				goBack(buf);
				printf("第%d行第%d列有运算符错误!\n",buf->CurRow,buf->CurColumn);
				sprintf(tokenTmp,"第%d行第%d列有运算符错误!\n",buf->CurRow,buf->CurColumn);
				strcat(tokenBuf,tokenTmp);
				return;
			}
		}
		else
		{
			printf("第%d行第%d列有未知符号!-->%c\n",buf->CurRow,buf->CurColumn,buf->CurChar);
			sprintf(tokenTmp,"第%d行第%d列有未知符号!-->%c\n",buf->CurRow,buf->CurColumn,buf->CurChar);
			strcat(tokenBuf,tokenTmp);
		}
	}
}

//扫描识别单词
void scanWord(pBuf buf)
{
	getNextChar(buf);//获取首字符
	while('\0' != buf->CurChar)//一直扫描到buf末尾
	{
		if(!isWs(buf->CurChar))//如果不是空白字符
		{
			if(isLetter(buf->CurChar) || buf->CurChar == '_')//如果是字母或下划线(标识符或关键字)
			{
				recognizeId(buf);
			}
			else if(isDigit(buf->CurChar))//如果是数字可能是数值常量
			{
				recognizeDig(buf);
			}
			else if(buf->CurChar == '/')//可能是除号或注释
			{
				getNextChar(buf);
				if(buf->CurChar == '/')//注释不用管
				{
					while(buf->CurChar != '\n')
					{
						getNextChar(buf);
					}
				}
				else if(buf->CurChar == '*')
				{
					getNextChar(buf);
					while(buf->CurChar != '*' || buf->buf[buf->cnt] != '/')
					{
						getNextChar(buf);
					}
				}
				else if(!isWs(buf->CurChar)){
					goBack(buf);
					printf("(\"%c\",210)\n",buf->CurChar);
					sprintf(tokenTmp,"(\"%c\",210)\n",buf->CurChar);
					strcat(tokenBuf,tokenTmp);
				}
			}
			else if(buf->CurChar == '\'')//可能是字符常量
			{
				char tmp;
				int i = 0;
				bool wrong = false;
				getNextChar(buf);
				tmp = buf->CurChar;
				getNextChar(buf);
				while(buf->CurChar != '\'')
				{
					++i;
					getNextChar(buf);
					if(buf->CurChar == '\n')
					{
						goBack(buf);
						printf("第%d行第%d列有字符错误!\n",buf->CurRow,buf->CurColumn);
						sprintf(tokenTmp,"第%d行第%d列有字符错误!\n",buf->CurRow,buf->CurColumn);
						strcat(tokenBuf,tokenTmp);
						wrong = true;
						break;
					}
				}
				if(!wrong)
					if(i > 0)
					{
						printf("第%d行第%d列有字符错误!\n",buf->CurRow,buf->CurColumn);
						sprintf(tokenTmp,"第%d行第%d列有字符错误!\n",buf->CurRow,buf->CurColumn);
						strcat(tokenBuf,tokenTmp);
					}
					else
					{
						printf("(\"%c\",500)\n",tmp);
						sprintf(tokenTmp,"(\"%c\",500)\n",tmp);
						strcat(tokenBuf,tokenTmp);
					}
			}
			else if(buf->CurChar == '"')//可能是字符串常量
			{
				int index = 0;
				char sta = '0';
				char strcon[128] = {0};
				getNextChar(buf);
				while(buf->CurChar != '"')
				{
					if(buf->CurChar == '\n')
					{
						goBack(buf);
						printf("第%d行第%d列有字符串错误!\n",buf->CurRow,buf->CurColumn);
						sprintf(tokenTmp,"第%d行第%d列有字符串错误!\n",buf->CurRow,buf->CurColumn);
						strcat(tokenBuf,tokenTmp);
						sta = '1';
						break;
					}
					strcon[index++] = buf->CurChar;
					getNextChar(buf);
					
				}
				if(sta == '0')
				{
					printf("(\"%s\",600)\n",strcon);
					sprintf(tokenTmp,"(\"%s\",600)\n",strcon);
					strcat(tokenBuf,tokenTmp);
				}
			}
			else//可能是界符或运算符
			{
				recognizeBoOp(buf);
			}
		}
		getNextChar(buf);
	}
}
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值