【编译原理】类C语言词法分析器的设计

最新推荐文章于 2024-04-26 10:25:20 发布

老北京的热干面

最新推荐文章于 2024-04-26 10:25:20 发布

阅读量9k

点赞数 10

分类专栏：编译原理

本文链接：https://blog.csdn.net/qq_36946026/article/details/80273206

版权

编译原理专栏收录该内容

3 篇文章 2 订阅

订阅专栏

1.实验要求

输入为一个以类C语言编写的源程序

输出为一组二元组序列构成的文本文件，一行为一个二元组，二元组中间以逗号隔开

实验报告上要求附上DFA

2.语言说明：

保留字：unsigned、break、return、void、case、float、char、for、while、continue、if、default、do、int、switch、double、long、else

运算符：+，-，*，/，>，>=,<,<=,==,!=,&&,||，！

界限符：{ }（）; ,

常量：十进制无符号数

标识符：以字母或下划线开始，后面跟上字母或数字

3. 实验原理

词法分析是编译的第一阶段。词法分析器的主要任务是读入源程序的输入字符，将它们组成词素，生成并输出一个词法单元序列，这个词法单元序列被输出到语法分析器进行语法分析。另外，由于词法分析器在编译器中负责读取源程序，因此除了识别词素之外，它还会完成一些其他任务，比如过滤掉源程序中的注释和空白，将编译器生成的错误消息与源程序的位置关联起来等。词法分析器的作用如下：

读入源程序的输入字符，将它们组成词素，生成并输出一个词法单元序列；

过滤掉源程序中的注释和空白；

将编译器生成的错误消息与源程序的位置关联起来；

4.DFA设计：

5.实验代码

#include<iostream>
#include<string>
#include<map>
#include<vector>
#include<iomanip>
#include<ctype.h>
#include<fstream>
using namespace std;
typedef pair<int,int> mp;
const string key_word[]={"unsigned","break","return","void","case","float","char","for","while","continue","if","default","do","int","switch","double","long","else"};//保留字
const string operators[]={"+","-","*","/",">",">=","<","<=","==","!=","&&","||","!"};//运算符
const char jiefu[]={',',';','(',')','{','}'};//界符
map<string,mp> flag_table;//标识符表等
map<string,mp>num_table;//常量表
map<string,mp>str_table;//字符串表
map<string,mp>head_table;//头文件表
map<string,mp>char_table;//字符表
map<char,mp>fenjiefu_table;//分界符表
int keymark[40],operatormark[40];
int isjiefu(char ch)
{
	for(int i=0;i<5;i++)
		if(ch==jiefu[i])
			return 1;
	return 0;
}
//判断是不是关键字
int iskey(string ch)
{
	int i;
	for(i=0;i<17;i++)
		if(key_word[i]==ch)
			return i;
	return -1;
}
int isope(string ch)
{
	int i;
	for(i=0;i<12;i++)
		if(operators[i]==ch)
			return i;
	return -1;
}
int ischar(char ch)
{
	if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z')
		return 1;
	return -1;
}
int isnumber(char ch)
{
	if(ch>='0'&&ch<='9')
		return 1;
	return -1;
}
//获得关键字或者标识符
void get_keyflag(char* ptr,FILE* f)
{
	string token;
	token+=*ptr;
	while(1)
	{
		*(++ptr)=fgetc(f);
		if(ischar(*ptr)==-1&&isnumber(*ptr)==-1&&*ptr!='_')
		break;
		token+=*ptr;
	}
	ungetc(*ptr,f);
	//*ptr='\0';
	int h=iskey(token);
	if(h>=0)//是关键字
	{
		if(!keymark[h])
			keymark[h]=1;
		cout<<token<<" , "<<"保留字"<<endl;
	}
	else//标识符
	{
		mp tmp;
		if(flag_table.find(token)==flag_table.end())//崭新的标记符
		{
			tmp=make_pair(1,flag_table.size()+1);
			flag_table[token]=tmp;
		}
		else
		{
			map<string,mp>::iterator it;
			it=flag_table.find(token);
			tmp=it->second;
		}
		cout<<token<<" , "<<"标识符"<<endl;
	}
}
//获取数字
void get_num(char* ptr,FILE* f)
{
	string token;
	token+=*ptr;
	while(1)
	{
		*(++ptr)=fgetc(f);
		if(isnumber(*ptr)!=1)
		break;
		token+=*ptr;
	}
	ungetc(*ptr, f);
	mp tmp;
	if(num_table.find(token)==num_table.end())//新的数字
	{
		tmp=make_pair(2,num_table.size()+1);//2代表着数字
		num_table[token]=tmp;
	}
	else
	{
		map<string,mp>::iterator it;
		it=num_table.find(token);
		tmp=it->second;
	}
	cout<<token<<" , "<<"常量"<<">"<<endl;
}
//获得字符串
void get_string(char* ptr,FILE* f)
{
	string token;
	*(++ptr)=fgetc(f);
	token+=*(ptr);
	while(1)
	{
		*(++ptr)=fgetc(f);
		if(feof(f)||*ptr=='"')
		break;
		token+=*(ptr);
	}
	if(*(ptr)=='"')
	{
		map<string,mp>::iterator it;
		it=str_table.find(token);
		mp tmp;
		if(it==str_table.end())//新的字符串
		{

			tmp=make_pair(3,str_table.size()+1);
			str_table[token]=tmp;
		}
		else
		{tmp=it->second;}
		cout<<token<<" , "<<"标识符"<<endl;
	}
	//找不到匹配的字符串
	else
	{
		cout<<"程序错误！"<<endl;
	}
}
void double_operator(char* ptr,FILE* f)
{
	string token,tmp;
	token+=*ptr;
	tmp+=*ptr;
	*(++ptr)=fgetc(f);
	token+=*ptr;
	int p=isope(token);
	if(p>=0)
	{
		operatormark[p]=1;
		cout<<token<<" , "<<"运算符"<<endl;
	}
	else
	{
		ungetc(*ptr,f);
		p=isope(tmp);
		operatormark[p]=1;
		cout<<tmp<<" , "<<"运算符"<<endl;
	}
}
void single_operator(char* ptr,FILE* f)
{
	string token;
	token+=*ptr;
	int index=isope(token);
	if(index>=0)
	{
		operatormark[index]=1;
		cout<<token<<" , "<<"运算符"<<endl;
	}
	else
	{
		if(*ptr<0||*ptr>127)
			cout<<"输入的不是ascii码"<<endl;
	}
}
void acehandle(char* ptr,FILE* f)
{
   string token;
   token+=*ptr;
   while(1)
   {
	   ptr++;
	   *ptr=fgetc(f);
	   if(*ptr==' '||*ptr=='"'||*ptr=='<')
   	    break;
	   token+=*ptr;
   }
   if(*ptr=='<'||*ptr=='"')
   ungetc(*ptr,f);
   if(token=="#define")
   {
	   cout<<token<<","<<"宏定义"<<endl;
   }
   if(token=="#include")
   {
	   cout<<setw(5)<<"<"<<setw(10)<<token<<setw(20)<<"预处理"<<setw(20)<<">"<<endl;
	   *ptr=fgetc(f);
	   cout<<setw(5)<<"<"<<setw(10)<<*ptr<<setw(20)<<"分界符"<<setw(20)<<">"<<endl;
	   string tmp;
	   *ptr=fgetc(f);
	   tmp+=*ptr;
	   while(1)
	   {
		   *(++ptr)=fgetc(f);
		   if(*ptr=='>'||*ptr=='"')
			   break;
		   tmp+=*ptr;
	   }
	   mp p;
	   if(head_table.find(tmp)==head_table.end())
	   {
		   p=make_pair(4,head_table.size()+1);
		   head_table[tmp]=p;
	   }
	   else
	   {
		   map<string,mp>::iterator it;
		   it=head_table.find(tmp);
		   p=it->second;
	   }
	   cout<<setw(5)<<"<"<<setw(10)<<tmp<<setw(10)<<p.first<<setw(10)<<"头文件"<<setw(10)<<p.second<<">"<<endl;
	   cout<<setw(5)<<"<"<<setw(10)<<*ptr<<setw(20)<<"分界符"<<setw(20)<<">"<<endl;
   }
}
void handlechar(char ch,FILE* f)
{
	char* ptr=new char[1000];
	*ptr=ch;
	if(ischar(*ptr)==1)//以字母开头的--关键字或者标识符
	{
		get_keyflag(ptr,f);}
	else if(isnumber(*ptr)==1)
	{
		get_num(ptr,f);}
	else if(*ptr=='#')
	{
		acehandle(ptr,f);}
	else if(*ptr=='"')
	{
		get_string(ptr,f);}
	else if(*ptr=='+'||*ptr=='-'||*ptr=='*'||*ptr=='/'||*ptr=='='||*ptr=='%'||*ptr=='>'||*ptr=='<')
	{
		double_operator(ptr,f);}
	else if(isjiefu(*ptr)==1)
	{
		mp tmp;
		map<char,mp>::iterator it;
		if(fenjiefu_table.find(*ptr)==fenjiefu_table.end())
		{
			tmp=make_pair(6,fenjiefu_table.size()+1);
			fenjiefu_table[*ptr]=tmp;}
		else
		{
			it=fenjiefu_table.find(*ptr);
			tmp=it->second;}
		 cout<<*ptr<<" , "<<"界符"<<endl;
	}
	else
	{
		single_operator(ptr,f);}
}
int main()
{
	FILE* f;
	f=fopen("input.txt","r");

	if(f==NULL)
	{
		cout<<"读入文件错误"<<endl;
		return 0;}
	else
	{
		char ch=fgetc(f);
		while(!feof(f))
		{
			if(ch!=' '&&ch!='\n'&&ch!='\t')
				handlechar(ch,f);
			ch=fgetc(f);}
	}
	fclose(f);
	return 0;
}

输入的类C词法

char word[10];
        char pro[100][100] = { "PROGRAM", 

"BEGIN", "END", "VAR", "INTEGER", "WHILE", "IF", "THEN", 

"ELSE", "DO", "PROCEDURE" ,


"char","int","if","else","var" 

,"return","break","do","while","for","double","float","short"};

        int n = 0;
        word[n++] = a[i++];
        while ((a[i] >= 'A'&&a[i] <= 'Z') || (a

[i] >= '0' && a[i] <= '9')||(a[i]>='a'&&a[i]<='z'))
        {
            word[n++] = a[i++];
        }
        word[n] = '\0';
        i--;

程序运行输出结果：

6.分析讨论：

（1）遇见问题:二目运算符无法识别，导致拆分识别为两个单目运算符；第二个问题就是如果识别的词法带有注释，会报错。

（2）解决方法： 修改识别运算符代码部分加入双目运算符的识别

（3）后续改进思路：一个不方便的地方就是没能实现通过输入文件名读取文件的数据，每次读入新的文件仍需要修改代码。

老北京的热干面

关注

10
点赞
踩
99

收藏

觉得还不错? 一键收藏
打赏
3
评论
【编译原理】类C语言词法分析器的设计

1.实验要求输入为一个以类C语言编写的源程序输出为一组二元组序列构成的文本文件，一行为一个二元组，二元组中间以逗号隔开实验报告上要求附上DFA 2.语言说明：保留字：unsigned、break、return、void、case、float、char、for、while、continue、if、default、do、int、switch、double、long、else运算符：+，-，*，/，...
复制链接

扫一扫