编译器-词法分析

最新推荐文章于 2024-09-27 14:19:28 发布

KDF5000

最新推荐文章于 2024-09-27 14:19:28 发布

阅读量1.6k

点赞数

分类专栏： c++ C

本文链接：https://blog.csdn.net/kongdefei5000/article/details/14521825

版权

c++ 同时被 2 个专栏收录

5 篇文章 0 订阅

订阅专栏

4 篇文章 0 订阅

订阅专栏

要求：单词符号及种别表

单词符号

种别编码

单词值

main

1

　

int

2

　

float

3

　

double

4

　

char

5

　

if

6

　

else

7

　

do

8

　

while

9

　

l(l|d)*

10

内部字符串

( +|-|ε ) dd(.dd | ε)( e ( +|-|ε ) dd|ε)

20

二进制数值表示

=

21

　

+

22

-

23

　

24

　

/

25

　

(

26

　

)

27

　

{

28

　

}

29

　

,

30

　

;

31

　

>

32

　

>=

33

　

<

34

　

<=

35

　

==

36

　

!=

37

　

#

0

　

1. 总体设计思想

首先将指定语言的所有出现的单词（可以是一类也可以是特定的）构造其正规式，然后根据正规式构造NFA，最后将NFA确定化为DFA，词DFA即为遇到此类单词时的状态转换图也就是程序的流程分支图，每一种单词的状态转换图又是整个词法分析程序的分支，组合到一块几可以画出整个分析程序的状态转换图。

2. 详细算法设计

下面给出关键单词的NFA：

科学技术法： ( +|-|ε )dd*(.dd* | ε)( e ( +|-|ε ) dd*|ε)

标示符：l(l|d)*

下面给出程序的伪代码：

ch=getch();//从源码缓冲区去一个字符

while(isBlank(ch))

{

         ch=getch();//从源码缓冲区去一个字符

}

switch(ch)

{

         根据ch的类型按照状态图流程进行判断；

         return type;

}

3. 流程框图

4. 函数相关说明

scaner(constchar codeBuffer[],int&startPosition,char token[])：扫描指定缓冲区的字符串，识别出从startPosition开始的该语言的单词，codeBuffer为字符串缓冲区指针，startPosition为识别的起始位置，token为识别出的单词存放数组指针。

isBlank(constchar ch)：判断ch是否为空白字符，包括制表符，空格，换行符。

isLetter(constchar ch)：判断ch是否为字母。

isDigi(constchar ch)：判断ch是否为数字

getTypeCode(constchar token[])：如果识别出的是字符串就查表给出字符串的类型码

judgeEe(char ch,constcharcodeBuffer[],int&startPosition,char token[],int&m)：如果为科学计数法的形式，找出E/e后面的部分

5. 输入与输出

输入：以#号结束所给文法的源程序字符串

输出：二元组（returnCode,token或sum）构成的序列

例如：输入：abc+123#

输出：(10,’abc’)

(13,’+’)

(11,123)

出错处理：

如果出现不符合该语言的构造都要提示错误

例如：123e+就不符合该语言构造规则，为错误语言

6. 程序运行结果

程序源码：

/********************************************************
*文件名：Morphology.h词法分析器相关函数的声明
*功能:实现词法分析功能
*时间：2013.9.28
*作者:KDF5000
*/
#include "common.h"
#pragma once
class Morphology
{
public:
	Morphology(void);
	~Morphology(void);

	//char *keyWordTable[KEYWORD_NUMBER];
	//扫描代码缓冲区,函数返回字符串的代码，字符串或者数字保存在token数组中
	int scaner(const char codeBuffer[],int &startPosition,char token[]); 
	bool isBlank(const char ch);//判断是否为空格等字符
	
	static bool isLetter(const char ch);  //判断输入字符是否为字母
	static bool isDigi(const char ch);    //判断输入字符是否为数字
	int getTypeCode(const char token[]);  //获取字符串对应的code
	bool judgeEe(char ch,const char codeBuffer[],int &startPosition,char token[],int &m);//遇到E/e时进行后面的判断
	void setPreIsOp(bool isPreOP);   //前一个字符串是否为运算符
private:
	bool preIsOp;   //前一个字符串是否为运算符
};

/*******************************************************
*Morphology.cpp文件，主要函数的实现
********************************************************/
#include "Morphology.h"
#include <string.h>
//关键字表
#ifndef KEYWORDTABLE
#define KEYWORDTABLE    
char *keyWordTable[KEYWORD_NUMBER] = {"begin","if","then","while","do","end"};
#endif

Morphology::Morphology(void)
{
	 preIsOp = true;
}


Morphology::~Morphology(void)
{
}

void Morphology::setPreIsOp(bool isPreOp)
{
	preIsOp = true;
}
int Morphology::scaner(const char codeBuffer[],int &startPosition,char token[])
{
	//将token数组清空
	memset(token,0,sizeof(token));
	int m = 0; //token的指针
	char preCh;
	char ch = codeBuffer[startPosition++];
	//确保第一个字符不是空格，制表符，换行等符号
	while(isBlank(ch))
	{
		ch = codeBuffer[startPosition++];
	}
	
	//判断第一个字符的类型，根据状态转换图确定字符串
	//若第一个字符是字母
	if(isLetter(ch))
	{
		//根据状态图确定字符串
		while(isLetter(ch) || isDigi(ch) )
		{
			token[m++] = ch;
			ch = codeBuffer[startPosition++];
		}
		token[m] = '\0';//在token字符串末尾添加结束符
		//将不是字母或者数字的字符放回缓冲区
		startPosition--;

		//判断取得的字符串的类型码
		int stringCode = getTypeCode(token);
		preIsOp = false;
		return stringCode;
	}
	//如果为‘+’,'-'号或者是数字
	else if(ch=='+' || ch == '-'|| isDigi(ch))
	{
		
		if(isDigi(ch))
		{
			preIsOp=true;
		}
		else
		{
			preCh = ch;
			token[m++]= ch;
			ch = codeBuffer[startPosition++];
		}

		if(isDigi(ch)&&preIsOp==true)
		{
			while(isDigi(ch))
			{
				token[m++] = ch;
				ch = codeBuffer[startPosition++];
			}
			//如果是E/e
			if(ch == 'E' || ch == 'e')
			{
				if(!judgeEe(ch,codeBuffer,startPosition,token,m))
				{
					preIsOp = false;
					startPosition--;
					return ERROR;
				}
				preIsOp = false;
				startPosition--;
				return DIGIT;
			}
			//如果是’.’
			if(ch == '.')
			{
				//如果后面不是数字,则将'.'放回缓冲区
				token[m++] = ch;
				ch = codeBuffer[startPosition++];
				//如果E/e后面不是数字，则将E重新放回缓冲区
				if(!isDigi(ch))
				{
					startPosition--;
					token[m] = '\0';
					//返回数字类型碼
					preIsOp = false;
					return ERROR;
				}
				//如果是数字
				while(isDigi(ch))
				{
					token[m++] = ch;
					ch = codeBuffer[startPosition++];
				}
				//
				if(ch == 'E' || ch == 'e')
				{
					if(!judgeEe(ch,codeBuffer,startPosition,token,m))
					{
						preIsOp = false;
						startPosition--;
						return ERROR;
					}
				}
				token[m] = '\0';
				startPosition--;
				preIsOp = false;
				return DIGIT;
			}
			token[m] = '\0';
			startPosition--;
			preIsOp = false;
			return DIGIT;
		}
		else
		{
			if(preCh=='+')
			{
				token[m] = '\0';
				startPosition--;
				preIsOp=true;
				return PLUS;				
			}
			else
			{
				token[m] = '\0';
				startPosition--;
				preIsOp=true;
				return SUB;
			}

		}
	}
	//*
	else if(ch == '*')
	{
		token[m++] = '*';
		token[m] = '\0';
		preIsOp=true;
		return STAR;
	}
	//'/'
	else if(ch == '/')
	{
		bool endNode= false;
		char nextCh;
		token[m++] = '/';
		ch = codeBuffer[startPosition++];
		if(ch=='*')
		{
			token[m++]= ch;
			while(!endNode)
			{
				ch = codeBuffer[startPosition++];
				if(ch=='#')
				{
					startPosition--;
					return ERROR;
				}
				nextCh = codeBuffer[startPosition];
				if(ch=='*' && nextCh=='/')
				{
					token[m++]= ch;
					token[m++]= nextCh;
					startPosition++;
					endNode = true;
				}
				else
				{
					token[m++]= ch;
				}
			}
			//startPosition--;
			token[m] = '\0';
			return NOTE;
		}
		else
		{
			startPosition--;
			token[m] = '\0';
			preIsOp=true;
			return SLASH;
		}
		
	}
	else if(ch == '=')
	{
		token[m++] = '=';
		token[m] = '\0';
		preIsOp = true;
		return EQUAL;
	}
	//':' ,':='
	else if(ch == ':')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(ch == '=')
		{
			token[m++] = ch;
			token[m] = '\0';
			return MAOHAO_DENGHAO;
		}
		startPosition--;
		token[m] = '\0';
		return MAOHAO;
	}
	//'<', '<>,'<='
	else if(ch == '<')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(ch == '>')
		{
			token[m++] = ch ;
			token[m] = '\0';
			return SMALL_BIGGER;
		}
		if(ch == '=')
		{
			token[m++] = ch ;
			token[m] = '\0';
			return SMALLER_EQUAL;
		}
		startPosition--;
		token[m] = '\0';
		return SMALLER;
	}
	// '>'
	else if(ch == '>')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(ch == '=')
		{
			token[m++] = ch ;
			token[m] = '\0';
			return BIGGER_EQUAL;
		}
		startPosition--;
		token[m] = '\0';
		return BIGGER;
	}
	// ';'
	else if(ch == ';')
	{
		token[m++] = ch;
		token[m] = '\0';
		return FENHAO;
	}
	else if(ch == '(')
	{
		token[m++] = ch;
		token[m] = '\0';
		preIsOp=true;
		return KUOHAO_L;
	}
	else if(ch == ')')
	{
		token[m++] = ch;
		token[m] = '\0';
		preIsOp=false;
		return KUOHAO_R;
	}
	//如果为结束字符#
	else if(ch =='#')
	{
		token[m++] = '#';
		token[m] = '\0';
		return END_JINGHAO;
	}
	token[m++] = ch;
	token[m] = '\0';
	return ERROR;
}
//判断是否为空格，制表符，换行等字符
bool Morphology::isBlank(const char ch)
{
	if(ch==' '|| ch=='\n'|| ch=='\t')
	{
		return true;
	}
	else
	{
		return false;
	}
}
//判断字符是否为字母
bool Morphology::isLetter(const char ch)
{
	if((ch >= 'A' && ch <= 'Z') || ((ch >= 'a' && ch <= 'z')) )
	{
		return true;
	}
	else
	{
		return false;
	}
}
//判断字符是否为数字
bool Morphology::isDigi(const char ch)
{
	if(ch>='0' && ch<='9')
	{
		return true;
	}
	else
	{
		return false;
	}
}
//获取制定字符串的类型码，病作为含函数值返回
int Morphology::getTypeCode(const char token[])
{
	for(int n=0;n<KEYWORD_NUMBER;n++)
	{
		if(strcmp(token,keyWordTable[n])==0)
		{
			return (n+1);
		}
	}
	return IDENTIFIER;
}

//遇到E/e时进行后面的判断
bool Morphology::judgeEe(char ch,const char codeBuffer[],int &startPosition,char token[],int &m)
{
	token[m++] = ch;
	ch = codeBuffer[startPosition++];
	//如果E/e后面不是数字，则将E重新放回缓冲区
	if(ch == '+' || ch == '-')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(!isDigi(ch))
		{
			token[m] = '\0';
			//返回数字类型碼
			return false;
		}
	}
	else
	{
		if(!isDigi(ch))
		{
			token[--m] = '\0';
			//返回数字类型碼
			return false;
		}
	}
	//进入科学计数法E后面的文法
	while(isDigi(ch))
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
	}
	if(ch=='.')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(!isDigi(ch))
		{
			token[m] = '\0';
			return false;
		}
		else
		{
			while(isDigi(ch))
			{
				token[m++] = ch;
				ch = codeBuffer[startPosition++];
			}
			token[m] = '\0';
			return true;
		}
	}
	else
	{
		return true;
	}
}

单词符号	种别编码	单词值
main	1
int	2
float	3
double	4
char	5
if	6
else	7
do	8
while	9
l(l\|d)*	10	内部字符串
( +\|-\|ε ) dd(.dd \| ε)( e ( +\|-\|ε ) dd*\|ε)	20	二进制数值表示
=	21
+	22
-	23
*	24
/	25
(	26
)	27
{	28
}	29
,	30
;	31
>	32
>=	33
<	34
<=	35
==	36
!=	37
#	0