CMM词法分析(C++实现)

这学期开了解释器与构造器这门课,根据老师的要求要逐步编写CMM语言的词法分析、语法分析,最终完成一个简单的IDE,在此记录下我完成的过程。

本人初学者,对很多方面如效率问题、代码的健壮性都未仔细考虑,如有错误或不足的地方,欢迎指出。

下面贴出代码

WordList.h

#ifndef __WordList__Test__
#define __WordList__Test__

#include <stdio.h>
#include <string>
#include <iostream>

//关键字
#define IF 1
#define ELSE 2
#define WHILE 3
#define READ 4
#define WRITE 5
#define INT 6
#define REAL 7
#define KEY_DESC "关键字"

//标识符
#define IDENTIFER 20
#define IDENTIFER_DESC "标识符"

//错误标识符
#define WRONGID 21
#define WRONGID_DESC "非法标识符"

//常量
#define INT_VAL 30 //整数常量
#define REAL_VAL 31	//实数常量
#define CONSTANT_DESC "常量"

//特殊符号
#define ADD 41 // +
#define SUB 42 // -
#define MUL 43 // *
#define DIV 44 // /
#define ASG 45 // =
#define LES_THAN 46    // <
#define MORE_THAN 47   // >
#define NOT_EQUAL 48    // <>
#define LEFT_BRA 49    // (
#define RIGHT_BRA 50   // )
#define SEMI 51    // ;
#define LEFT_BOUNDER 52    // {
#define RIGHT_BOUNDER 53   // }
#define EQUAL 54   // ==
#define LEFT_INDEX 55  // [
#define RIGHT_INDEX 56 // ]
#define SPECIAL_CODE "特殊符号"

#define NOTE 70   // /* */
#define NOTE_WRONG 71	//	错误注释
#define NOTE_DESC "注释"
#define NOTE_WRONG_DESC "错误注释"


using namespace std;

class WordList
{
public:
	WordList(void); //构造

    ~WordList(void);    //析构

    static WordList* create(void);  //create函数

    bool init(void);    //初始化函数

    void addMessage(string content, string describe, int type, int line);    //加入信息

	void printMessage();	//打印信息

    WordList* getNext();    //获得下个节点

    void setNext(WordList *next);   //设置下个节点

    static WordList* getHead(); //获得头节点

    static WordList* getLast(); //获得尾节点

    static void changeLast();   //改变尾节点

	int getType();	//获得type

	int getLine();	//获得line

	string getContent();	//获得content

private:
    string content; //内容
    string describe;    //描述
    int type;   //类型
    int line;   //所在行数
    WordList *next; //队列下一节点
    static WordList *head;  //队列头节点
    static WordList *last;  //队列尾节点
};

#endif
WordList.cpp

#include "WordList.h"

using namespace std;

WordList *WordList::last = NULL;

WordList *WordList::head = WordList::create();

WordList::WordList()
{

}

WordList::~WordList()
{

}

WordList *WordList::create()
{
    WordList *pRet = new WordList();
    if(pRet && pRet->init())
    {
		if(WordList::last != NULL)
		{
			WordList::last->setNext(pRet);
		}
		WordList::last = pRet;
        return pRet;
    }
    else
    {
        delete pRet;
	pRet = NULL;
        return NULL;
    }
}

bool WordList::init()
{
    bool bRet = false;
    do
    {

        content = "";
        describe = "";
        type = 0;
        line = 0;
        next = NULL;

        bRet = true;

    }while(0);

    return bRet;
}

void WordList::addMessage(string content, string describe, int type, int line)
{
    this->content = content;
    this->describe = describe;
    this->type = type;
    this->line = line;
}

void WordList::printMessage()
{
	cout<<this->content<<"  "<<this->describe<<"  "<<this->type<<"  "<<this->line<<"\n";
}

WordList* WordList::getNext()
{
    return this->next;
}

void WordList::setNext(WordList *next)
{
    this->next = next;
}

WordList* WordList::getHead()
{
    return WordList::head;
}

WordList* WordList::getLast()
{
    return WordList::last;
}

void WordList::changeLast()
{
	WordList::last = WordList::last->getNext();
}

int WordList::getType()
{
	return this->type;
}

int WordList::getLine()
{
	return this->line;
}

string WordList::getContent()
{
	return this->content;
}
WordList类是用来存储词法分析过程中生成的词素的链表节点类,通过create以及init函数实现新建节点时自动加入队列,通过两个静态的head和last节点记录链表的头和尾。

Task2.h

#ifndef __Task2__Test__
#define __Task2__Test__

#include <stdio.h>
#include <iostream>
#include "WordList.h"



using namespace std;

class Task2
{
public:
	Task2(void);
	~Task2(void);
	void printWordList();	//打印得到的词法分析表

	void scanner();	//读取输入文件

	void chooseForWord();	//逐词读取文件并进行判断

	void stringProcess();	//对字符串进行处理

	void _WrongStringProcess();	//对‘_‘开头的字符串进行错误处理

	void numberProcess();	//对整数进行处理

	void realProcess(const int &num);	//对实数进行处理

	void specialCodeProcess();	//对特殊符号进行处理

	int isKey(char *word);	//判断是否为关键字,若是则返回种别码

private:
	int line;	//记录每个词素的行
	FILE *fileForCMM;	//CMM文件
	char flag;	//记录读取的每个词
	char *words;	//将记录的连续的词记录
	char array[30];	//临时记录字符串
	bool firstReadFlag;	//判断是否是第一次从文件读取


};

#endif

Task2.cpp

#include "Task2.h"
#include <iostream>

using namespace std;

const char *key[] = {"if", "else", "while", "read", "write", "int", "real"};

Task2::Task2()
{
	
}

Task2::~Task2()
{

}

int Task2::isKey(char *word)
{
	for(int i=0; i<7; i++)
	{
		if(strcmp(word, key[i]) == 0)
		{
			return i+1;
		}
	}
	return IDENTIFER;
}

void Task2::printWordList()
{
	WordList *newWordList = WordList::getHead();
	while(newWordList->getNext() != NULL)
	{
		newWordList = newWordList->getNext();
		newWordList->printMessage();
	}
}                      

void Task2::scanner()
{
	firstReadFlag = true;

	line = 1;

	char fileName[20];
	cout<<"请输入需要读取的CMM文件的路径\n";
	scanf("%s", fileName);
	fileForCMM = fopen(fileName, "r");
	if(!fileForCMM)
	{
		cout<<"读取文件失败\n";
		return;
	}

	this->chooseForWord();
}

void Task2::chooseForWord()
{
	if(firstReadFlag)
	{
		flag = fgetc(fileForCMM);
		firstReadFlag = false;
	}
	if(flag != EOF)
	{
		if((flag >= 'A' && flag <= 'Z') || (flag >= 'a' && flag <= 'z'))
		{
			this->stringProcess();
		}
		else if(flag == '_')
		{
			this->_WrongStringProcess();
		}
		else if(flag >= '0' && flag <= '9')
		{
			this->numberProcess();
		}
		else if(flag == '{' || flag == '}' || flag == '['
			||  flag == ']' || flag == '(' || flag == ')'
			||  flag == '+' || flag == '-' || flag == '*'
			||  flag == '/' || flag == '<' || flag == '>'
			||  flag == '=' || flag == ';')
		{
			this->specialCodeProcess();
		}
		else if(flag == '\n')
		{
			line ++;
			flag = fgetc(fileForCMM);
			this->chooseForWord();
		}
		else if(flag == '\t' || flag ==' ')
		{
			flag = fgetc(fileForCMM);
			this->chooseForWord();
		}
		else
		{
			cout<<"第"<<line<<"行出现不可识别的符号"<<flag<<"\n";
			flag = fgetc(fileForCMM);
			this->chooseForWord();
		}
	}
	else
	{
		this->printWordList();
	}

}

void Task2::stringProcess()
{
	int i = 0;
	while((flag >='A' && flag <= 'Z') || (flag >= 'a' && flag <= 'z') 
		|| flag == '_' || (flag >= '0' && flag <= '9'))
	{
		array[i++] = flag;
		flag = fgetc(fileForCMM);
	}

	words = new char[i+1];
	memcpy(words, array, i);
	words[i] = '\0';

	int getFlag = this->isKey(words);
	if(getFlag != IDENTIFER)
	{
		WordList *newWordList = WordList::create();
		newWordList->addMessage(words, KEY_DESC, getFlag, line);
	}
	else
	{
		WordList *newWordList = WordList::create();
		if(words[i-1] == '_')
		{
			newWordList->addMessage(words, WRONGID_DESC, WRONGID, line);
		}
		else
		{
			newWordList->addMessage(words, IDENTIFER_DESC, getFlag, line);
		}
	}

	this->chooseForWord();
}

void Task2::numberProcess()
{
	int i = 0;
	while(flag >= '0' && flag <= '9')
	{
		array[i++] = flag;
		flag = fgetc(fileForCMM);
	}
	if(flag == '.')
	{
		this->realProcess(i);
		return;
	}
	if((flag >= 'a' && flag <= 'z') || (flag >= 'A' && flag <= 'Z') || (flag == '_'))
	{
		while((flag >= 'a' && flag <= 'z') || (flag >= 'A' && flag <= 'Z') 
			|| (flag == '_') || (flag >= '0' && flag <= '9'))
		{
			array[i++] = flag;
			flag = fgetc(fileForCMM);
		}
		words = new char[i+1];
		memcpy(words, array, i);
		words[i] = '\0';
		WordList *newWordList = WordList::create();
		newWordList->addMessage(words, WRONGID_DESC, WRONGID, line);
	}
	else
	{
		words = new char[i+1];
		memcpy(words, array, i);
		words[i] = '\0';
		WordList *newWordList = WordList::create();
		newWordList->addMessage(words, CONSTANT_DESC, INT_VAL, line);
	}
	this->chooseForWord();
}

void Task2::realProcess(const int &num)
{
	int i = num;
	array[i++] = flag;
	flag = fgetc(fileForCMM);
	while(flag >= '0' && flag <= '9')
	{
		array[i++] = flag;
		flag = fgetc(fileForCMM);
	}
	if((flag >= 'a' && flag <= 'z') || (flag >= 'A' && flag <= 'Z') || (flag == '_'))
	{
		while((flag >= 'a' && flag <= 'z') || (flag >= 'A' && flag <= 'Z') 
			|| (flag == '_') || (flag >= '0' && flag <= '9'))
		{
			array[i++] = flag;
			flag = fgetc(fileForCMM);
		}
		words = new char[i+1];
		memcpy(words, array, i);
		words[i] = '\0';
		WordList *newWordList = WordList::create();
		newWordList->addMessage(words, WRONGID_DESC, WRONGID, line);
	}
	else
	{
		words = new char[i+1];
		memcpy(words, array, i);
		words[i] = '\0';
		WordList *newWordList = WordList::create();
		newWordList->addMessage(words, CONSTANT_DESC, REAL_VAL, line);
	}
	this->chooseForWord();
}

void Task2::_WrongStringProcess()
{
	int i = 0;
	while((flag >='A' && flag <= 'Z') || (flag >= 'a' && flag <= 'z') 
		|| flag == '_' || (flag >= '0' && flag <= '9'))
	{
		array[i++] = flag;
		flag = fgetc(fileForCMM);
	}

	words = new char[i+1];
	memcpy(words, array, i);
	words[i] = '\0';

	WordList *newWordList = WordList::create();

	newWordList->addMessage(words, WRONGID_DESC, WRONGID, line);

	this->chooseForWord();
}

void Task2::specialCodeProcess()
{
	WordList *newWordList = WordList::create();

	words = new char[2];
	array[0] = flag;
	memcpy(words, array, 1);
	words[1] = '\0';

	if(flag == '{')
	{
		newWordList->addMessage(words, SPECIAL_CODE, LEFT_BOUNDER, line);
	}
	else if(flag == '}')
	{
		newWordList->addMessage(words, SPECIAL_CODE, RIGHT_BOUNDER, line);
	}
	else if(flag == '[')
	{
		newWordList->addMessage(words, SPECIAL_CODE, LEFT_INDEX, line);
	}
	else if(flag == ']')
	{
		newWordList->addMessage(words, SPECIAL_CODE, RIGHT_INDEX, line);
	}
	else if(flag == '(')
	{
		newWordList->addMessage(words, SPECIAL_CODE, LEFT_BRA, line);
	}
	else if(flag == ')')
	{
		newWordList->addMessage(words, SPECIAL_CODE, RIGHT_BRA, line);
	}
	else if(flag == '+')
	{
		newWordList->addMessage(words, SPECIAL_CODE, ADD, line);
	}
	else if(flag == '-')
	{
		newWordList->addMessage(words, SPECIAL_CODE, SUB, line);
	}
	else if(flag == '>')
	{
		newWordList->addMessage(words, SPECIAL_CODE, MORE_THAN, line);
	}
	else if(flag == ';')
	{
		newWordList->addMessage(words, SPECIAL_CODE, SEMI, line);
	}
	else if(flag == '*')
	{
		newWordList->addMessage(words, SPECIAL_CODE, MUL, line);
	}
	else if(flag == '<')
	{
		flag = fgetc(fileForCMM);
		if(flag == '>')
		{
			words = new char[3];
			array[1] = flag;
			memcpy(words, array, 2);
			words[2] = '\0';
			newWordList->addMessage(words, SPECIAL_CODE, NOT_EQUAL, line);
		}
		else
		{
			fseek(fileForCMM, -1L, SEEK_CUR);
			newWordList->addMessage(words, SPECIAL_CODE, LES_THAN, line);
		}
	}
	else if(flag == '=')
	{
		flag = fgetc(fileForCMM);
		if(flag == '=')
		{
			words = new char[3];
			array[1] = flag;
			memcpy(words, array, 2);
			words[2] = '\0';
			newWordList->addMessage(words, SPECIAL_CODE, EQUAL, line);
		}
		else
		{
			fseek(fileForCMM, -1L, SEEK_CUR);
			newWordList->addMessage(words, SPECIAL_CODE, ASG, line);
		}
	}
	else if(flag == '/')
	{
		flag = fgetc(fileForCMM);
		
		if(flag == '*')
		{
			char flag_2 = fgetc(fileForCMM);
			if(flag_2 != EOF)
			{
				while(!(flag == '*' && flag_2 == '/') && flag_2 != EOF)
				{
					if(flag_2 == '\n')
					{
						line ++;
					}
					flag = flag_2;
					flag_2 = fgetc(fileForCMM);
				}
				if(flag_2 == EOF)
				{
					newWordList->addMessage("注释", NOTE_WRONG_DESC, NOTE_WRONG, line);
				}
				else
				{
					newWordList->addMessage("注释", NOTE_DESC, NOTE, line);
				}
			}
		}
		else
		{
			fseek(fileForCMM, -1L, SEEK_CUR);
			newWordList->addMessage(words, SPECIAL_CODE, DIV, line);
		}
	}

	flag = fgetc(fileForCMM);
	this->chooseForWord();

	
}
Task2为词法分析主流程,通过scanner方法读取CMM文件,chooseForWord方法对读到的每个词进行判断,根据读到的不同类型,分别通过stringProcess、_WrongStringProcess、numberProcess、realProcess、specialCodeProcess方法进行处理,之后再返回到chooseForWord方法,如此递归,最后通过printWordList打印结果。


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值