实现词法分析器（C++）

月光_刻本

于 2024-09-21 12:16:25 发布

阅读量288

点赞数 3

分类专栏：编译器设计文章标签： c++ 算法开发语言

本文链接：https://blog.csdn.net/weixin_64343068/article/details/142414616

版权

编译器设计专栏收录该内容

8 篇文章 0 订阅

订阅专栏

1.采用文件形式输入代码，comp.cpp

#include<iostream>
#include"globalvar.h"
#include "semantic.h"
using namespace std;
FILE* file;

char filename[] = "test.txt";
void Compiler(char filename[])
{	
	file = fopen(filename, "r");
	if (file)
	{
		cout << "打开文件成功！开始读取文件。。。" << endl;
		program();
	}
	else
	{
		cout << "打开文件失败！" << endl;
		exit(0);
	}
}
int main()
{	
	Compiler(filename);
	return 0;
}

2.program为语法分析的程序，在语法分析器中得到有效单词，而单词需要通过词法分析实现，所以语法分析使用词法分析的返回值。

3.词法分析器 Lexical.cpp

#include"globalvar.h"
#include<iostream>
extern FILE* file;
#define BUFLEN 80//缓冲区大小
char* lineLen = NULL;//缓冲区内的数据长度
int readPos = -1;//读取位置
char line[BUFLEN];//缓冲区
int lineNum = 1;//行号
int colNum = 0;//列号
int error;//错误终止
char lastch = ' ';//上一个字符
char ch = ' ';//当前字符
int f=0;//scan()返回标记
int scan() 
{
	if (ch == 0)//一行缓冲区读取完毕
	{
		lineLen = fgets(line, BUFLEN, file);//重新加载缓冲区数据
		if (lineLen == NULL)//没有数据
		{
			
			line[0] = -1;//文件结束
		}
		readPos = -1;		//恢复读取位置
	}
	readPos++;
	ch = line[readPos];//获取新字符
	if (lastch == '\n')
	{
		lineNum++;
		colNum = 0;
	}
	if (ch == -1)//文件结束，自动关闭
	{
		fclose(file);
		file = NULL;
		return -1;
	}
	else if (ch != '\n')
		colNum++;
	lastch = ch;
	return 0;
}

enum Tag token = NW_NULL;//当前检测到的符号类别

char idname[idLen + 1];//存放标识符
int val = 0;//存放数字
char str[stringLen + 1];//存放字符串
char letter = 0;//存放字符
bool digit() {
	return ch >= '0' && ch <= '9' ? 1 : 0;
}
bool cha() {
	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ? 1 : 0;
}
#define reservedNum 18
static char reservedTable[reservedNum][idLen] = { 
	"break",
	"case","char","cin","continue","cout",
	"default","do",
	"else","extern",
	"for",
	"if","int",
	"return",
	"string","switch",
	"void",
	"while"};
static enum Tag reservedSymbol[reservedNum] = { 
	NW_BREAK,
	NW_CASE,NW_CHAR,NW_CIN,NW_CONTINUE,NW_COUT,
	NW_DEFAULT,NW_DO,
	NW_ELSE,NW_EXTERN,
	NW_FOR,
	NW_IF,NW_INT,
	NW_RETURN,
	NW_STRING,NW_SWITCH,
	NW_VOID,
	NW_WHILE
};
void checkKeyword()//检查是否为关键字
{
	int i = 0, j = reservedNum - 1, k = 0;
	do
	{
		k = (i + j) / 2;//折半查找
		if (strcmp(idname, reservedTable[k]) < 0)
		{
			j = k - 1;
		}
		else if (strcmp(idname, reservedTable[k]) > 0)
		{
			i = k + 1;
		}
		else//找到了,是关键字
		{
			token = reservedSymbol[k];
			break;
		}
	} while (i <= j);
	if (i > j)
		token = ID;//搜索失败，是标识符
}

int tokenize()
{
	while (ch == ' ' || ch == '\n' || ch == '\t'||ch==0)//跳过无效字符
	{
		f=scan();
	}

	
		if (f == -1)
		{
			token = NW_NULL;
			return -1;
		}
	
	//标识符（关键字）
	if (cha() || ch == '_') 
	{
		int idCount = 0;//为标识符的长度计数
		int reallen = 0;//实际标识符长度
		int f;//getChar返回标记,作用：ch取下一个字符
		//取标识符
		do {
			reallen++;
			if (idCount < idLen)
				{
					idname[idCount] = ch;
					idCount++;
				}
			f = scan();
		} while (cha() || ch == '_' ||digit());
		idname[idCount] = 0;//标识符结束
		if (reallen > idLen)//标识符过长
		{
			lexerror(id2long, 0);
		}
		checkKeyword();//检查关键字
		return f;
	}
	//数字
	else if (digit())
	{
		token = NUM;
		int numCount = 0;//为数字的长度计数
		val = 0;//数值迭代器
		int reallen = 0;//实际数字长度
		do {
			reallen++;
			if (numCount < numLen)
			{
				val = ch - '0' + val * 10;
				numCount++;
			}
			f = scan();
		} while (digit());
		if (reallen > numLen)//数字过长
		{
			lexerror(num2long, 0);
		}
		return f;
	}
	else
		{
			//界符
			switch (ch) {
			case'+':
				token = ADD;
				GET_CHAR;
				break;
			case'-':
				token = SUB;
				GET_CHAR;
				break;
			case'*':
				token = MUL;
				GET_CHAR;
				break;
			case'/':
				GET_CHAR;
				token = DIV;
				if (ch == '/') {//单行注释
					token = NW_NULL;
					while (ch != '\n')
						GET_CHAR;
					GET_CHAR;
				}
				else if (ch == '*') {//多行注释
					token = NW_NULL;
					do {
						GET_CHAR;
						if (ch == '*')
						{
							GET_CHAR;
							if (ch == '/')
								break;
						}
					} while (1);
					GET_CHAR;
				}
				break;
			case'>':
				token = GT;
				GET_CHAR;
				if (ch == '=')
				{
					token = GE;
					GET_CHAR;
				}
				else if (ch == '>')
				{
					token = INPUT;
					GET_CHAR;
				}
				break;
			case'<':
				token = LT;
				GET_CHAR;
				if (ch == '=')
				{
					token = LE;
					GET_CHAR;
				}
				else if (ch == '<')
				{
					token = OUTPUT;
					GET_CHAR;
				}
				break;
			case'=':
				token = ASSIGN;
				GET_CHAR;
				if (ch == '=')
				{
					token = EQU;
					GET_CHAR;
				}
				break;
			case'&':
				token = LEA;
				GET_CHAR;
				if (ch == '&')
				{
					token = AND;
					GET_CHAR;
				}
				break;
			case'|':
				token = NW_NULL;
				GET_CHAR;
				if (ch == '|')
				{
					token = OR;
					GET_CHAR;
				}
				break;
			case'!':
				token = NOT;
				GET_CHAR;
				if (ch == '=')
				{
					token = NEQU;
					GET_CHAR;
				}
				break;
			case',':
				token = COMMA;
				GET_CHAR;
				break;
			case':':
				token= COLON;
				GET_CHAR;
				break;
			case';':
				token = SEMICON;
				GET_CHAR;
				break;
			case'(':
				token = LPAREN;
				GET_CHAR;
				break;
			case')':
				token = RPAREN;
				GET_CHAR;
				break;
			case'[':
				token =LBRACK;
				GET_CHAR;
				break;
			case']':
				token = RBRACK;
				GET_CHAR;
				break;
			case'{':
				token = LBRACE;
				GET_CHAR;
				break;
			case'}':
				token = RBRACE;
				GET_CHAR;
				break;
			case -1:
				lexerror(charwrong, 0);
				return -1;
			default:
				token = EXCEP;
				lexerror(excpchar, ch);
				//虽然是词法错误，但是不影响语法语义的分析过程，暂且定位为警告
				GET_CHAR;
			}
		}
	return 0;
	}