针对c语言的词法分析输出token序列和值

针对简易版c语言的词法分析:

详细思路可直接看代码内注释。

【文法定义】:

<标识符>::=<字母>{<字母>|<数字>} //标识符和关键字都不区分大小写,比如if和IF均为关键字,不允许出现与关键字相同的标识符

<字母>::=_|a|...|z|A|...|Z

<数字>::=0|1|...|9

<整数>::=[+|-]<无符号整数>

<无符号整数>::=<数字>{<数字>}

<字符>::=‘<加法运算符>’|’<乘法运算符>’|’<字母>’|’<数字>’

<加法运算符>::=+|-

<乘法运算符>::=*|/

<字符串>::="{十进制编码为32,33,35-126的ASCII字符}" //字符串中要求至少有一个字符

【问题描述】

请根据给定的文法设计并实现词法分析程序,从源程序中识别出单词,记录其单词类别和单词值,输入输出及处理要求如下:

(1)数据结构和与语法分析程序的接口请自行定义;类别码需按下表格式统一定义;

(2)为了方便进行自动评测,输入的被编译源文件统一命名为testfile.txt(注意不要写错文件名);输出的结果文件统一命名为output.txt(注意不要写错文件名),结果文件中每行按如下方式组织:

单词类别码 单词的字符/字符串形式(中间仅用一个空格间隔)

单词的类别码请统一按如下形式定义:

无

【输入形式】testfile.txt中的符合文法要求的测试程序。
【输出形式】要求将词法分析结果输出至output.txt中。


#include<iostream>
#include<stdio.h>
#include<cstring>
#include<map>
#include <fstream>

using namespace std; 

const int MAXN=1000;
string token[MAXN];		//词法分析token序列
string val[MAXN];
int p = 0;
int cnmd =0;


//看是否为数字
bool NUMBER(char A)
{
	if(A>='0'&&A<='9')
		return true;
	return false;
}
//看是否为字母或下划线 
bool LETTER(char A)
{
	if((A=='_')||(A>='A'&&A<='Z')||(A>='a'&&A<='z'))
		return true;
	return false;
}
//看是否是保留字或标识符 
bool RESERVEDWORD(string s)
{
	if(s=="const")
	{
		token[p++] = "CONSTTK";
		val[cnmd++] = s;
		//fprintf(fp,"%s %s\n","CONSTTK",a);
		return true;
	}
	if(s=="int")
	{
		token[p++] = "INTTK";
		val[cnmd++] = s;
		//fprintf(fp,"%s %s\n","INTTK",a);
		return true;
	}
	if(s=="char")
	{
		token[p++] = "CHARTK";
		val[cnmd++] = s;
		//fprintf(fp,"%s %s\n","CHARTK",a);
		return true;
	}
	if(s=="void")
	{
		token[p++] = "VOIDTK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","VOIDTK",a);
		return true;
	}
	if(s=="main")
	{
		token[p++] = "MAINTK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","MAINTK",a);
		return true;
	}
	if(s=="if")
	{
		token[p++] = "IFTK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","IFTK",a);
		return true;
	}
	if(s=="else")
	{
		token[p++] = "ELSETK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","ELSETK",a);
		return true;
	}
	if(s=="do")
	{
		token[p++] = "DOTK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","DOTK",a);
		return true;
	}
	if(s=="while")
	{
		token[p++] = "WHILETK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","WHILETK",a);
		return true;
	}
	if(s=="for")
	{
		token[p++] = "FORTK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","FORTK",a);
		return true;
	}
	if(s=="scanf")
	{
		token[p++] = "SCANFTK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","SCANFTK",a);
		return true;
	}
	if(s=="printf")
	{
		token[p++] = "PRINTFTK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","PRINTFTK",a);
		return true;
	}
	if(s=="return")
	{ 
		token[p++] = "RETURNTK";
		val[cnmd++] = s;
//		fprintf(fp,"%s %s\n","RETURNTK",a);
		return true;
	}
	return false;
}
void cifa()
{
	//标识符、整数、字符、字符串、保留字、运算符、界符
	FILE *f;
	//FILE *fp;
	char CHAR;
	char cur[120];
	f=fopen("testfile.txt","r"); 
	//fp=fopen("output.txt","w");  
	while((CHAR=fgetc(f))!=EOF)
	{
		while(CHAR!=' ')
		{
			if(LETTER(CHAR))//以字母或开头,要么是保留字,要么是标识符 
			{
				string s="";
				int k=0;
				do{
					s+=CHAR;
				}while((CHAR=fgetc(f))!=EOF&&CHAR!=' '&&LETTER(CHAR));
				if(NUMBER(CHAR))//字母后跟数字 
				{
					do{
						s+=CHAR;
					}while((CHAR=fgetc(f))!=EOF&&CHAR!=' '&&NUMBER(CHAR));
				}
				if(!(RESERVEDWORD(s)))//判断标识符是保留字还是标识符 
				{
//					fprintf(fp,"%s %s\n","IDENFR",cur);
					token[p++] = "IDENFR";
					val[cnmd++] = s;
					
				} 
			}
			else if(NUMBER(CHAR))//以数字开头,只能是整数 
			{
				string s="";
				int i=0;
				do{
					s+=CHAR;
		
				}while((CHAR=fgetc(f))!=EOF&&CHAR!=' '&&NUMBER(CHAR));
				//cur[i]='\0';
				//fprintf(fp,"%s %s\n","INTCON",cur);
				token[p++] = "INTCON";
				val[cnmd++] = s;
			}
			//判断运算符 
			if(CHAR=='+')
			{
				token[p++] = "PLUS";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","PLUS",CHAR);
			}
			if(CHAR=='-')
			{
				token[p++] = "MINU";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","MINU",CHAR);
			}
			if(CHAR=='*')
			{
				token[p++] = "MULT";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","MULT",CHAR);
			}
			if(CHAR=='/')
			{
				token[p++] = "DIV";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","DIV",CHAR);
			}
			if(CHAR=='=')
			{
				if((CHAR=fgetc(f))!=EOF&&CHAR=='=')
				{
					token[p++] = "EQL";
					val[cnmd++] = "==";
//					fprintf(fp,"%s %s\n","EQL","==");
				}
				else 
				{
					token[p++] = "ASSIGN";
					val[cnmd++] = "=";
					
					if(CHAR=='i') 
					{
						token[p++] = "IDENFR";
						val[cnmd++] = "i";
					}
//					fprintf(fp,"%s %s\n","ASSIGN","=");
				}
			}
			if(CHAR=='<')
			{
				if((CHAR=fgetc(f))!=EOF&&CHAR=='=')
				{
					token[p++] = "LEQ";
					val[cnmd++] = "<=";
//					fprintf(fp,"%s %s\n","LEQ","<=");
				}
				else 
				{
					token[p++] = "LSS";
					val[cnmd++] = "<";
//					fprintf(fp,"%s %s\n","LSS","<");
				}
			}
			if(CHAR=='>')
			{
				if((CHAR=fgetc(f))!=EOF&&CHAR=='=')
				{
					token[p++] = "GEQ";
					val[cnmd++] = ">=";
//					fprintf(fp,"%s %s\n","GEQ",">=");
				}
				else 
				{
					token[p++] = "GRE";
					val[cnmd++] = ">";
//					fprintf(fp,"%s %s\n","GRE",">");
				}
			}
			if(CHAR=='!')
			{
				if((CHAR=fgetc(f))!=EOF&&CHAR=='=')
				{
					token[p++] = "NEQ";
					val[cnmd++] = "!=";
//					fprintf(fp,"%s %s\n","NEQ","!=");
				}
			}
			//判断界符 
			if(CHAR==';')
			{
				token[p++] = "SEMICN";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","SEMICN",CHAR);
			}
			if(CHAR==',')
			{
				token[p++] = "COMMA";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","COMMA",CHAR);	
			}
			if(CHAR=='(')
			{
				token[p++] = "LPARENT";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","LPARENT",CHAR);
			}
			if(CHAR==')')
			{
				token[p++] = "RPARENT";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","RPARENT",CHAR);
			}
			if(CHAR=='[')
			{
				token[p++] = "LBRACK";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","LBRACK",CHAR);
			}
			if(CHAR==']')
			{
				token[p++] = "RBRACK";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","RBRACK",CHAR);
			}
			if(CHAR=='{')
			{
				token[p++] = "LBRACE";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","LBRACE",CHAR);
			}
			if(CHAR=='}')
			{
				token[p++] = "RBRACE";
				val[cnmd++] += CHAR;
//				fprintf(fp,"%s %c\n","RBRACE",CHAR);
			}
			//判断字符和字符串
			if(CHAR=='\'')
			{
				string s = ""; 
				//int j=0;
				while((CHAR=fgetc(f))!=EOF&&CHAR!='\'')
				{
					s+=CHAR;
				
				}
				token[p++] = "CHARCON";
				val[cnmd++] = s;
				//fprintf(fp,"%s %s\n","CHARCON",TOKEN);
			}
			if(CHAR=='"')
			{
				string s = ""; 
				while((CHAR=fgetc(f))!=EOF&&CHAR!='"')
				{
					s+=CHAR;	
				}
				//cur[m]='\0';
				//fprintf(fp,"%s %s\n","STRCON",TOKEN);
				token[p++] = "STRCON";
				val[cnmd++] = s;
			}
			break;
		}
	}
	
	
	
//	for(int i=0;i<p;i++)
//	{
//	//	cout<<token[i]<<" ";
//		cout<<val[i]<<" ";	
//	}
	fclose(f);


}


int main()
{
	
    cifa();
    //yufafenxi();
	return 0;
}

ps:上面的程序已经留好了token序列的接口,后续的语法分析可根据改程序继续开发。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值