【编译原理】用 C语言 编写的 C语言 词法分析器

1.目标:

用C语言编写一个C语言源程序的词法分析器(标题收回)

2.要求

[1] 基本要求:识别关键字、运算符、界限符、常量(布尔型、整型)、标识符;
[2] 扩展要求:常量(浮点型)、注释、错误处理。
3.对照表

 

 4.代码(废话少说)

头文件 

#include "gets.h"

是我自己无聊时写的,我经常要用的一些功能的结合体,本文除了文件读取时的getfdstr语句以外没有其他的有关该头文件的语句。

char getfdstr(char filename[],char data[])
{
	int size = 0;
	char letter[Max] = "";
	char w;
	//int i,j;
	int length = 0;
	freopen(filename,"r",stdin);
	while(cin >> w)
	{
		if (w != ' ')
		{
			letter[length] = w;
			length++;
		}
	}
	letter[length] = '\0';
	size = strlen(letter);
	char *result = new char[size];
	strcpy_s(result,size+1,letter);
	strcpy_s(data,size+1,result);
	return *result;
}

正片开始

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <conio.h>
#include <windows.h>
#include "gets.h"

#define Max 3068

using namespace std;

enum
{
	UNDERLINE = 0,//下划线
	LETTER = 1,//字母
	NUMBER = 2,//数字
	SYMBOL = 3,//符号
	SYMBOLERROR = 4,//符号错误
	IDENTIFIERERROR = 5//标识符错误
};//枚举 可以不写用数字代替 但我记性不好就用这个法子了

char data[Max];
char letter[Max];

bool isfloat = false;//浮点数判断
bool isbool = false;//布尔判断
bool notes = false;//注释判断
bool isvariable = false;//标识符判断
bool isiderror = false;//错误标识符判断

string key[32]={"char","double","enum","float","int","long","short","signed",
	"struct","union","unsigned","void","for","do","while","break","continue",
	"if","else","goto","switch","case","default","return","auto","extern","register",
	"static","const","sizeof","typedef","volatile"}; 
int keyNum[32]={1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
string keyword[32]={"CHAR","DOUBLE","ENUM","FLOAT","INT","LONG","SHORT","SIGNED",
	"STRUCT","UNION","UNSIGNED","VOID","FOR","DO","WHILE","BREAK","CONTINUE",
	"IF","ELSE","GOTO","SWITCH","CASE","DEFAULT","RETURN","AUTO","EXTERN","REGISTER"
	"STATIC","CONST","SIZEOF","TYPEDEF","VOLATILE"};
//关键字及其对应种别码与注记符


string symbol[33]={"+","-","*","/","%","++","--",">","<","==",
	"!=",">=","<=","&&","||","!","=","+=","-=","*=","/=","%=",
	",","(",")","[","]","{","}",";","/*","*/","'"};
int symbolNum[33]={33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66};
string symbolword[33]={"PLUS","MINUS","MULTI","RDIV","MODULO","INC","DEC","GT","LT","EQ","NEQ",
	"GE","LE","AND","OR","NOT","ASSIGN","PLUS_A","MINUS_A","MUL_A","DIV_A","MOD_A","COMMA",
	"LR_BRAC","RR_BRAC","LS_BRAC","RS_BRAC","L_PRA","R_PRA","SEMIC","L_ANNO","R_ANNO","QMARK"};
//符号的种别码与注记符

int num;
int length;

//void print(string s,string z,int n);
void Getword();
void error(FILE *fp,int type,string word);

string identify(char s,int n);//返回标识符
string Number(char s,int n);//返回数字
string symbolstr(char s,int n);//返回符号
string variable(char s,int n);//返回标识符

string Keyword(int n);//根据关键词种别码返回关键词
string Symbolword(int n);//根据符号种别码返回符号

bool isNum(char s);//判断是否是数字
bool isLetter(char s);//判断是否是字母
bool issymbol(char s);//判断是否是符号
bool isBool(string s);//因为表里没有bool关键字 但有布尔类型 所以额外加了一个判断

int wordtype(char str);//字符类型判断
int iskeyword(string s);//返回关键词种别码
int isSymbol(string s);//返回符号种别码


int main()
{
	char filename[Max] = "input.txt";
	/*这里可以加个提示语句然后用scanf接收文件名给filename*/
	getfdstr(filename,data);
	//自己写的头文件里的东西,意思就是把读取文件将文件内空格去除放到data里
	length = strlen(data);
	Getword();
	getch();
}

//下面就慢慢看吧,有点长懒得解释
void Getword()
{
	FILE *fp;
	int key;
	int count = 0;


	fp = fopen("output.txt","w");
	if (fp == NULL)
	{
		printf("文件打开失败!\n");
		system("pause");
		exit(0);
	}

	for (num=0;num<length;)
	{
		char str;
		string word;
		str = data[num];
		key = wordtype(str);
		switch (key)
		{
		case UNDERLINE:
			word = variable(str,num);
			printf("%s	(%s,70)	标识符\n",word.c_str(),word.c_str());
			fprintf(fp,"%s	(%s,70)	标识符\n",word.c_str(),word.c_str());
			isvariable = false;
			break;


		case LETTER:
			word = identify(str,num);
			if(notes)
				break;
			else if(isvariable)
			{
				if (iskeyword(word))
				{
					printf("%s	(%s,%d)	关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word));
					fprintf(fp,"%s	(%s,%d)	关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word));
				}
				else
				{
					printf("%s	(IDE,70)	标识符\n",word.c_str());
					fprintf(fp,"%s	(IDE,70)	标识符\n",word.c_str());
				}
				break;
			}
			else
			{
				if(!word.compare("bool"))
				{
					printf("%s	(BOOL,%d)	关键字\n",word.c_str(),67);
					fprintf(fp,"%s	(BOOL,%d)	关键字\n",word.c_str(),67);
				}
				else if(isbool)
				{
					printf("%s	(CONST_BOOL,%d)	布尔型\n",word.c_str(),67);
					fprintf(fp,"%s	(CONST_BOOL,%d)	布尔型\n",word.c_str(),67);
					isbool = false;
				}
				else
				{
					printf("%s	(%s,70)	标识符\n",word.c_str(),word.c_str());
					fprintf(fp,"%s	(%s,70)	标识符\n",word.c_str(),word.c_str());
					isvariable = false;
				}
				break;
			}


		case NUMBER:
			word = Number(str,num);
			if(notes)
				break;
			else if(isiderror)
			{
				error(fp,IDENTIFIERERROR,word);
				break;
			}
			else
			{
				if (isfloat)
				{
					printf("%s	(CONST _FLOAT,69)	浮点型\n",word.c_str());
					fprintf(fp,"%s	(CONST _FLOAT,69)	浮点型\n",word.c_str());
					isfloat = false;
				}
				else
				{
					printf("%s	(CONST _INT,68)	整型\n",word.c_str());
					fprintf(fp,"%s	(CONST _INT,68)	整型\n",word.c_str());
				}
				break;
			}


		case SYMBOL:
			word = symbolstr(str,num);
			if(notes)
			{
				if(count == 0)
				{
					printf("%s	(%s,%d)	界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
					fprintf(fp,"%s	(%s,%d)	界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
					count++;
				}
				break;
			}
			else
			{
				if(isSymbol(word) == SYMBOLERROR)
				{
					error(fp,SYMBOLERROR,word);
					break;
				}
				if(!word.compare("+")||!word.compare("-")||!word.compare("*")||!word.compare("/")||!word.compare("%")||
					!word.compare("++")||!word.compare("--")||!word.compare(">")||!word.compare("<")||!word.compare("==")||
					!word.compare("!=")||!word.compare(">=")||!word.compare("<=")||!word.compare("&&")||!word.compare("||")||
					!word.compare("!")||!word.compare("=")||!word.compare("+=")||!word.compare("-=")||!word.compare("*=")||
					!word.compare("/=")||!word.compare("%="))
				{
					printf("%s	(%s,%d)	运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
					fprintf(fp,"%s	(%s,%d)	运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
					break;
				}
				if(!word.compare("*/"))
				{
					printf("—————内容被注释—————\n");
					fprintf(fp,"—————内容被注释—————\n");
					printf("%s	(%s,%d)	界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
					fprintf(fp,"%s	(%s,%d)	界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
					count = 0;
					break;
				}
				printf("%s	(%s,%d)	界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
				fprintf(fp,"%s	(%s,%d)	界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
				break;
			}
		}	
	}
	fclose(fp);
}

int wordtype(char str)
{
	if ((str <= 'z' && str >= 'a') || (str <='Z' && str >= 'A'))
		return LETTER;
	if (str <= '9' && str >= '0')
		return NUMBER;
	if (str == '_')
		return UNDERLINE;
	else
		return SYMBOL;
}

string identify(char s,int n)
{
	int j = n+1;
	int flag = 1;
	string temp1(sizeof(s),s);
	while(flag)
	{
		if(!isvariable)
		{
			if (!isNum(data[j])&&isLetter(data[j])&&!issymbol(data[j]))
			{
				string temp2(sizeof(data[j]),data[j]);
				temp1.append(temp2);
				if (iskeyword(temp1))
				{
					j++;
					num = j;
					return temp1;
				}
				else if(isBool(temp1))
				{
					j++;
					num = j;
					isbool = true;
					return temp1;
				}
				else if(!temp1.compare("bool"))
				{
					j++;
					num = j;
					return temp1;
				}
				j++;
			}
			else if (data[j] == '_')
			{
				string temp2(sizeof(data[j]),data[j]);
				temp1.append(temp2);
				j++;
				num = j;
				isvariable = true;
			}
			else
			{
				flag = 0;
			}
		}
		else
		{
			if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
			{
				string temp2(sizeof(data[j]),data[j]);
				temp1.append(temp2);
				j++;
				num = j;
			}
			else
			{
				return temp1;
			}
		}
	}
	num = j;
	return temp1;
}

bool isNum(char s)
{
	if (s <= '9' && s >= '0')
		return true;
	else
		return false;
}

bool isLetter(char s)
{
	if ((s <= 'z' && s >= 'a') || (s <='Z' && s >= 'A'))
		return true;
	else
		return false;
}

int iskeyword(string s)
{
	for (int i = 0; i < 32; i++)
	{
		if (s.compare(key[i])==0)
		{
			return keyNum[i];
		}
	}
	return 0;
}

string Number(char s,int n)
{
	int j = n+1;
	int count = 0;
	int flag = 1;
	string temp1(sizeof(s),s);
	while (flag)
	{
		if(!isiderror)
		{
			if (isNum(data[j]))
			{
				string temp2(sizeof(data[j]),data[j]);
				temp1.append(temp2);
				j++;
			}
			else if(data[j]=='.'&&count==0)
			{
				string temp2(sizeof(data[j]),data[j]);
				temp1.append(temp2);
				j++;
				count++;
				isfloat=true;
			}
			else if((data[j]>='a'&&data[j]<='z')||(data[j]>='A'&&data[j]<='Z')||data[j]=='_')
			{
				string temp2(sizeof(data[j]),data[j]);
				temp1.append(temp2);
				j++;
				isiderror = true;
			}
			else
			{
				flag = 0;
			}
		}
		else
		{
			if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
			{
				string temp2(sizeof(data[j]),data[j]);
				temp1.append(temp2);
				j++;
			}
			else
			{
				flag = 0;
			}
		}
	}
	num = j;
	return temp1;
}

string symbolstr(char s,int n)
{
	int j = n+1;
	string str(sizeof(data[j]),data[j]);
	string temp(sizeof(s),s);
	if(!temp.compare(">")||!temp.compare("<")||!temp.compare("=")||!temp.compare("+")||!temp.compare("-")||
		!temp.compare("!")|!temp.compare("*")||!temp.compare("/")||!temp.compare("%"))
	{
		if(!str.compare("="))
		{
			string temp2(sizeof(data[j]),data[j]);
			temp.append(temp2);
			 j++;
		}
	}
	if(!temp.compare("+"))
	{
		if(!str.compare("+"))
		{
			string temp2(sizeof(data[j]),data[j]);
			temp.append(temp2);
			 j++;
		}
	}
	if(!temp.compare("-"))
	{
		if(!str.compare("-"))
		{
			string temp2(sizeof(data[j]),data[j]);
			temp.append(temp2);
			 j++;
		}
	}
	if(!temp.compare("&"))
	{
		if(!str.compare("&"))
		{
			string temp2(sizeof(data[j]),data[j]);
			temp.append(temp2);
			 j++;
		}
	}
	if(!temp.compare("|"))
	{
		if(!str.compare("|"))
		{
			string temp2(sizeof(data[j]),data[j]);
			temp.append(temp2);
			j++;
		}
	}
	if(!temp.compare("/"))
	{
		if(!str.compare("*"))
		{
			string temp2(sizeof(data[j]),data[j]);
			temp.append(temp2);
			j++;
			notes = true;
		}
	}
	if(!temp.compare("*"))
	{
		if(!str.compare("/"))
		{
			string temp2(sizeof(data[j]),data[j]);
			temp.append(temp2);
			j++;
			notes = false;
		}
	}
    num=j;
    return temp;
}

int isSymbol(string s)
{
	string temp(1,'"');
	if (s.compare(temp) == 0)
		return 65;
	for (int i = 0;i < 33 ;i++)
	{
		if (s.compare(symbol[i])==0)
			return symbolNum[i];
	}
	return SYMBOLERROR;
}

bool issymbol(char s)
{
	if ((s <= 'z' && s >= 'a') || (s <='Z' && s >= 'A')||(s <= '9' && s >= '0'))
		return false;
	else
		return true;
}

string Keyword(int n)
{
	if (n>=0&&n<=32)
		return keyword[n-1];
}

string Symbolword(int n)
{
	string result(1,'"');
	if(n == 65)
		return result;
	if(n>=33&&n<65)
		return symbolword[n-33];
	if(n == 66)
		return symbolword[33];
}

bool isBool(string s)
{
	if(!s.compare("true")||!s.compare("false")||!s.compare("TRUE")||!s.compare("FALSE"))
		return true;
	return false;
}

string variable(char s,int n)
{
	int j = n+1;
	int flag = 1;
	string temp1(sizeof(s),s);
	while(flag)
	{
		if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
		{
			string temp2(sizeof(data[j]),data[j]);
			temp1.append(temp2);
			j++;
		}
		else
		{
			flag = 0;
		}
	}
	num = j;
	return temp1;
}


void error(FILE *fp,int type,string word)
{
	switch(type)
	{
	case SYMBOLERROR:
		printf("ERROR!	ERRORTYPE:SymbolError!	%s\n",word.c_str());
		fprintf(fp,"ERROR!	ERRORTYPE:SymbolError!	%s\n",word.c_str());
		break;
	case IDENTIFIERERROR:
		printf("ERROR!	ERRORTYPE:IdentifierError!	%s\n",word.c_str());
		fprintf(fp,"ERROR!	ERRORTYPE:IdentifierError!	%s\n",word.c_str());
		break;
	}
}

5.结果截图

输入:

 控制台输出:

文件输出:

 

 

到这功能就差不多完成了,C语言我也没学多久,所以可能有些地方有疏漏,希望大家多多指正。

 

参考文章:

(13条消息) 词法分析器(分析C语言)_flamingobaby的博客-CSDN博客_词法分析c语言

  • 11
    点赞
  • 152
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值