关键字词法识别

本人想做一个自己的脚本语言,但是又不想使用lex之类的词法分析器,又不想自己画状态转换图,所以,写了下面的一段程序,它的功能是:

有如下若干个关键字:

char * KeyWords[1024] = {
	"var",
	"+",
	"-",
	"*",
	"/",
	"%",
	"if",
	"else",
	"switch",
	"case",
	"while",
	"do",
	"for",
	"continue",
	"break",
	"class",
	"struct",
	"union",
	"enum",
	"new"
};

通过循环调用

bool AddFSM(char * reg, int nStart, int nFinal);

会自动产生一个可以识别以上关键字的状态转换表。

具体代码如下:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <list>

struct node
{
	int m_nState;
	char m_cLetter;
	int m_nNextState;
	node * m_pNext;

};

class FSM
{
public:
	FSM();
	~FSM();
	bool AddFSM(char * reg, int nStart, int nFinal);
	bool Match(char * str);
	int m_nStartState;
	int m_nFinalState;
	int m_nNextState;
	std::list<node *> m_pTable;	
};

FSM::FSM()
{
	m_nStartState = 0;
	m_nFinalState = 1;
	m_nNextState = 2;
	node * pStart = new node;
	if (pStart)
	{
		pStart->m_cLetter = 0;
		pStart->m_nNextState = 0;
		pStart->m_nState = m_nStartState;
		pStart->m_pNext = NULL;
	}
	else
		return;

	node * pEnd = new node;
	if (pEnd)
	{
		pEnd->m_cLetter = 0;
		pEnd->m_nNextState = 0;
		pEnd->m_nState = m_nFinalState;
		pEnd->m_pNext = NULL;
	}
	else
	{
		delete pStart;
	}

	m_pTable.push_back(pStart);
	m_pTable.push_back(pEnd);
}

FSM::~FSM()
{
	std::list<node *>::iterator it;

	for (it = m_pTable.begin();it != m_pTable.end(); it++)
	{
		node * pCurNode = *it;
		while (pCurNode)
		{
			node * pTmp = pCurNode->m_pNext;
			delete pCurNode;
			pCurNode = pTmp;
		}
	}

	m_pTable.clear();
}

bool FSM::AddFSM(char * reg, int nStart, int nFinal)
{
	node * pTmp = NULL;

	if (!reg)
		return false;

	int nLen = strlen(reg);
	if (nLen == 0)
	{
		//不支持空
		return false;
	}

	std::list<node *>::iterator it;

	for (it = m_pTable.begin(); it != m_pTable.end(); it++)
	{
		if ((*it)->m_nState == nStart)
		{
			pTmp = *it;
			break;
		}
	}

	if (!pTmp)
		return false;

	if (nLen == 1)
	{
		if (pTmp)
		{
			//查询是否已经存在
			node * pFirst = pTmp->m_pNext;
			while (pFirst)
			{
				if (pFirst->m_cLetter == *reg && pFirst->m_nNextState == nFinal)
				{
					return true;
				}

				pFirst = pFirst->m_pNext;
			}
			node * pNewNode = new node;
			if (pNewNode)
			{
				pNewNode->m_nState = nStart;
				pNewNode->m_cLetter = *reg;
				pNewNode->m_nNextState = nFinal;
				pNewNode->m_pNext = pTmp->m_pNext;
				pTmp->m_pNext = pNewNode;
				return true;
			}
			return false;
		}

		return false;
	}

	//查询当前字符是否存在
	char c = *reg;

	node * pCurNode = NULL;

	if (pTmp)
	{
		pCurNode = pTmp->m_pNext;

		while (pCurNode)
		{
			if (pCurNode->m_cLetter == c)
			{
				break;
			}

			pCurNode = pCurNode->m_pNext;
		}
	}

	int nNewState = 0;

	char X[256] = { 0 };
	char Y[256] = { 0 };

	X[0] = *reg;

	strcpy(Y, reg + 1);

	if (pCurNode)
	{
		nNewState = pCurNode->m_nNextState;

		bool bRet3 = AddFSM(Y, nNewState, nFinal);

		return bRet3;
	}
	else
	{
		node * pNew = new node;
		if (!pNew)
			return false;
		nNewState = pNew->m_nState = m_pTable.size();
		pNew->m_cLetter = 0;
		pNew->m_nNextState = 0;
		pNew->m_pNext = NULL;
		m_pTable.push_back(pNew);
	}
	
	bool bRet = AddFSM(X, nStart, nNewState);
	
	bool bRet2 = AddFSM(Y, nNewState, nFinal);

	return bRet && bRet2;
}

bool FSM::Match(char * str)
{
	if (!str)
		return false;

	if (*str == 0)
		return false;

	char * pTmp = str;

	int nState = m_nStartState;

	node * pNode = NULL;

	std::list<node *>::iterator it = m_pTable.begin();

	node * pTmpNode = NULL;

	while (*pTmp)
	{
		pNode = NULL;

		for (; it != m_pTable.end(); it++)
		{
			if ((*it)->m_nState == nState)
			{
				pNode = *it;
				break;
			}
		}

		if (!pNode)
			return false;

		pTmpNode = pNode->m_pNext;

		bool bFind = false;

		while (pTmpNode)
		{
			if (pTmpNode->m_cLetter == *pTmp)
			{
				nState = pTmpNode->m_nNextState;
				bFind = true;
				break;
			}
			pTmpNode = pTmpNode->m_pNext;
		}

		if (!bFind)
		{
			return false;
		}

		pTmp++;
	}

	if (nState == m_nFinalState)
		return true;

	return false;
}


char * KeyWords[1024] = {
	"var",
	"+",
	"-",
	"*",
	"/",
	"%",
	"if",
	"else",
	"switch",
	"case",
	"while",
	"do",
	"for",
	"continue",
	"break",
	"class",
	"struct",
	"union",
	"enum",
	"new"
};


int main()
{
	FSM obj;

	char * pTmp = NULL;

	int i = 0;

	pTmp = *KeyWords;

	while (pTmp)
	{
		if (obj.AddFSM(pTmp, obj.m_nStartState, obj.m_nFinalState))
		{
			printf("Add %s suc!\n", pTmp);
		}
		else
		{
			printf("Add %s failed.\n", pTmp);
		}

		i++;

		pTmp = KeyWords[i];
	}

	FILE * fSrc = fopen("E:/a.cs", "rb+");

	char buf[1024] = { 0 };

	char str[1024] = { 0 };

	if (fSrc)
	{
		while (!feof(fSrc))
		{
			int nSize = fread(buf, 1, 1024, fSrc);

			int i = 0;

			int m = 0;

			while (i < nSize)
			{
				if (buf[i] != ' ' && buf[i] != '\t' && buf[i] != '\n' && buf[i] != '\r' && buf[i] != 0)
				{
					str[m++] = buf[i++];
				}
				else
				{
					if (strcmp(str, ""))
					{
						if (obj.Match(str))
						{
							printf("%s match suc\n", str);
						}
						else
						{
							printf("%s match failed.\n", str);
						}
					}

					i++;

					m = 0;

					memset(str, 0, 1024);
				}				
			}
		}
		
		fclose(fSrc);
	}

	system("pause");
	
	return 0;
}

扫描文件内容如下:

if var == else
	
do while for

+ - * /

class break abc

enum

struct

case

break continue def  new 

hello china for

 new

程序运行效果如下:

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

孟建行

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值