关闭

编译原理-词法分析器

标签: 编译原理词法分析c++编译器
241人阅读 评论(0) 收藏 举报
分类:

编译原理的词法分析器是我们编译原理课程的实验内容,语法分析器和语义分析等做完了再贴出来。


程序用c++写的,比较粗糙,报错部分还需要改善,欢迎大家批评指正,有疑问的请留言,大家一起进步!


头文件

//
#ifndef morpholopy_h
#define morpholopy_h

//NUM part
#define STATE_DONE 0
#define STATE_ERROR -1
#define STATE_BEGIN 12
#define STATE_INTEGER 13
#define STATE_DECIMAL_BEGIN 14
#define STATE_DECIMAL 15
#define STATE_E 16
#define STATE_SYMBOL 17
#define STATE_POWER 18
#define STATE_CHECK 19

#endif /* morpholopy_h */

wordAnalyze.cpp

#pragma warning(disable : 4996)
#include <iostream>
#include <stdio.h>
#include <ctype.h>
#include <iostream>  
#include <fstream> 
#include <stdlib.h>
#include "morpholopy.h"
using namespace std;
static int wcount = 0;

struct Word {
	char type[50];
	char value[50];
};

char* checkRelops(char* ch, Word result[]);
char* checkID(char* ch, Word result[]);
char* checkNum(char* p, Word words[]);
char* checkOP(char* ch, Word word[]);
char* checkEqual(char* ch, Word result[]);
void setResult_OP(Word word[], char op1, char op2);
char* checkLimit(char* ch, Word result[]);
char* checkAnnotate(char* ch, Word word[]);
void display(Word word[]);

//char key[][10] = {"auto","double","int","struct","break","else","long","switch","case","enum","register","typedef","char","extern","return","union","const","float","short","unsigned","continue","for","signed","void","default","goto","sizeof","volatile","do","if", "while","static"};

char key[][10] = { "auto", "double", "int", "struct", "break", "else", "long", "switch", "case", "enum", "register", "typedef", "char", "extern", "return", "union", "const", "float", "short", "unsigned", "continue", "for", "signed", "void", "default", "goto",
"sizeof", "volatile", "do", "if", "while", "static" };

int main(int argc, const char * argv[]) {

	char word[1024];
	Word result[1024];

	ifstream ifs;
	ifs.open("text.txt");
	if (!ifs.is_open())
	{
		cout << "Error opening file";
		exit(1);
	}
	while (!ifs.eof())
	{
		ifs.read(word, 256);
		int readCount=ifs.gcount();
		word[readCount] = '\0';
		char* start = &word[0];
		char* end = start;
		while (*(start) != '\0') {
			if (*start == ' ')
			{
				end = end + 1;
				start = end;
			}
			else if ((*start == '/' && *(start + 1) == '*') || (*start == '/' && *(start + 1) == '/'))
			{
				end = checkAnnotate(start, result);
				start = end;
			}
			else if (*start == '_' || isalpha(*start))
			{
				end = checkID(start, result);
				start = end;
			}
			else if (isdigit(*start))
			{
				end = checkNum(start, result);
				start = end;
			}
			else if (*start == '<' || *start == '>' || *start == '=')
			{
				end = checkRelops(start, result);
				if (end == start) {
					end = checkEqual(start, result);
				}
				start = end;
			}
			else if (*start == '+' || *start == '-' || *start == '*' || *start == '/' || *start == '%' || *start == '&' || *start == '|' || *start == '^' || *start == '~' || *start == '.' || *start == ':' || *start == '?')
			{
				end = checkOP(start, result);
				start = end;
			}
			else if (*start == '{' || *start == '}' || *start == '<' || *start == '>' || *start == '[' || *start == ']' || *start == '(' || *start == ')' || *start == '@' || *start == '#' || *start == ',' || *start == ';' || *start == '"')
			{
				end = checkLimit(start, result);
				start = end;
			}
			else
			{
				end = end + 1;
				start = end;
			}
		}
	}
	display(result);
	/*for (int i = 0; i < wcount; i++) {
		cout << "<" << result[i].type << "," << result[i].value << ">" << endl;
	}*/
	int a = 0;
	cin >> a;
	cout << a << endl;
	return 0;
}



char* checkLimit(char* ch, Word result[])
{
	char* temp = ch;
	char* afterFirstRef;
	char* beforeLastRef;
	if (*ch == '"'){
		afterFirstRef = ch;
		ch++;
		int forword=9;
		while (forword!=0){
			if (*ch == '"'){
				beforeLastRef = ch;
				forword = 0;
				//保存第一个引号
				char* write_ch = &result[wcount].value[0];
				strcpy(&result[wcount].type[0], "limit");
				*write_ch = *ch;
				write_ch++;
				*write_ch = '\0';
				wcount++;

				//保存中间的字符串常量
				//char* write_ch2 = &result[wcount].value[0];
				strcpy(&result[wcount].type[0], "const-ref");
				//*write_ch2 = *ch;
				
				afterFirstRef++;
				int refSize = beforeLastRef - afterFirstRef;
				for (int i = 0; i < refSize ; i++){
					result[wcount].value[i] = *afterFirstRef;
					afterFirstRef++;
				}
				result[wcount].value[refSize] = '\0';
				//write_ch2++;
				//*write_ch2 = '\0';
				wcount++;


				//保存第二个引号
				char* write_ch3 = &result[wcount].value[0];
				strcpy(&result[wcount].type[0], "limit");
				*write_ch3 = *ch;
				write_ch3++;
				*write_ch3 = '\0';
				wcount++;
			
				ch++;
				
			}
			else{
				if (*ch == '\0')//把“\0”当作结束符
					return temp;
				//保存引号中间的常量
				ch++;
			}
				
		}
	}
	else{
		char* write_ch = &result[wcount].value[0];
		strcpy(&result[wcount].type[0], "limit");
		*write_ch = *ch;
		write_ch++;
		*write_ch = '\0';
		wcount++;
		ch++;
	}
	return ch;
}

char* checkEqual(char* ch, Word result[])
{
	strcpy(&result[wcount].type[0], "=");
	strcpy(&result[wcount].value[0], " ");
	wcount++;
	;
	ch++;
	return ch;
}

char* checkRelops(char* ch, Word result[])
{
	char* start = ch;
	while (true) {
		if (*ch == '<') {
			ch++;
			if (*ch == '=') {
				strcpy(&result[wcount].type[0], "relop");
				strcpy(&result[wcount].value[0], "<=");
				wcount++;
				ch++;
				return ch;
			}
			else if (*ch == '>')
			{
				strcpy(&result[wcount].type[0], "relop");
				strcpy(&result[wcount].value[0], "<>");
				wcount++;
				ch++;
				return ch;
				//不等于
			}
			else
			{
				//小于
				strcpy(&result[wcount].type[0], "relop");
				strcpy(&result[wcount].value[0], "<");
				wcount++;
				return ch;
			}
		}
		else if (*ch == '=') {
			ch++;
			if (*ch == '=')
			{
				//等等于
				strcpy(&result[wcount].type[0], "relop");
				strcpy(&result[wcount].value[0], "==");
				wcount++;
				ch++;
				return ch;
			}
			else
				return start;
		}
		else if (*ch == '>') {
			ch++;
			if (*ch == '=') {
				//大于等于
				strcpy(&result[wcount].type[0], "relop");
				strcpy(&result[wcount].value[0], ">=");
				wcount++;
				ch++;
				return ch;
			}
			else
			{
				//大于
				strcpy(&result[wcount].type[0], "relop");
				strcpy(&result[wcount].value[0], ">");
				wcount++;
				return ch;
			}
		}
		else
			return start;
	}
}

char* checkNum(char* p, Word words[]) {
	int state = 12;
	char* write_p = &words[wcount].value[0];
	//char* pre_p = p;//向前看一个字符
	while (state > 0){
		switch (state) {
		case STATE_BEGIN:
			if ((*p <= '9' && *p >= '0')) {
				state = STATE_INTEGER;
			}
			else {
				state = STATE_ERROR;//格式出错
			}
			break;

		case STATE_INTEGER:
			if ((*p <= '9' && *p >= '0')) {
				state = STATE_INTEGER;
			}
			else if (*p == '.') {
				state = STATE_DECIMAL_BEGIN;
			}
			else if (*p == 'E') {
				state = STATE_E;
			}
			else {
				state = STATE_DONE;//进入终结状态
			}
			break;

		case STATE_DECIMAL_BEGIN:
			if ((*p <= '9' && *p >= '0')) {
				state = STATE_DECIMAL;
			}
			else {
				state = STATE_ERROR;//格式出错
			}
			break;

		case STATE_DECIMAL:
			if ((*p <= '9' && *p >= '0')) {
				//state = STATE_DECIMAL;
			}
			else if (*p == 'E') {
				state = STATE_E;
			}
			else {
				state = STATE_DONE;//进入终结状态
			}
			break;

		case STATE_E:
			if ((*p <= '9' && *p >= '0')) {
				state = STATE_POWER;
			}
			else if (*p == '+' || *p == '-') {
				state = STATE_SYMBOL;
			}
			else {
				state = STATE_ERROR;//格式出错
			}
			break;

		case STATE_SYMBOL:
			if ((*p <= '9' && *p >= '0')) {
				state = STATE_POWER;
			}
			else {
				state = STATE_ERROR;//格式出错
			}
			break;

		case STATE_POWER:
			if ((*p <= '9' && *p >= '0')) {
				//state = STATE_POWER;
			}
			else {
				state = STATE_DONE;//进入终结状态
			}
			break;
		}

		if (state == STATE_DONE) {
			//正确终结
			strcpy(&words[wcount].type[0], "num");
			*write_p = '\0';
			break;
		}
		else if (state == STATE_ERROR) {
			//格式出错
			strcpy(&words[wcount].type[0], "NUM_ERROR");
			*write_p = '\0';
			break;

		}
		else {
			//当前字符匹配
			*write_p = *p;
			++write_p;
			++p;
		}
	}
	//存储结果的数组的下标移向下一位
	wcount++;
	return p;
}

char* checkID(char* ch, Word result[])
{
	char* write_p = &result[wcount].value[0];

	int state = 12;
	while (state > 0) {
		switch (state) {
		case STATE_BEGIN:
			if ((*ch) == '_' || isalpha(*ch)) {
				state = STATE_CHECK;
			}
			break;
		case STATE_CHECK:
			if ((*ch) == '_' || isalpha(*ch) || isdigit(*ch)) {
				state = STATE_CHECK;
			}
			else
				state = STATE_DONE;
			break;
		default:
			break;
		}

		if (state == STATE_DONE) {
			strcpy(&result[wcount].type[0], "id");
			*write_p = '\0';
			break;
		}
		else
		{
			*write_p = *ch;
			++write_p;
			++ch;
		}
	}
	for (int i = 0; i < sizeof(key); i++) {
		if (!strcmp(result[wcount].value, key[i]))
		{
			strcpy(&result[wcount].type[0], "key");

		}
	}
	wcount++;
	return ch;
}

char* checkOP(char* ch, Word word[]){
	ch = ch + 1;
	if (*ch == *(ch - 1) && *ch != '+' && *ch != '-' && *ch != '*' && *ch != '/' && *ch != '%' && *ch != '~' && *ch != '^' && *ch != '.' && *ch != '?' && *ch != ':'){
		setResult_OP(word, *ch, *ch);
		ch++;
	}
	else{
		setResult_OP(word, *(ch - 1), '\0');
	}
	strcpy(&word[wcount].type[0], "op");
	wcount++;
	return ch;
}

char* checkAnnotate(char* ch, Word word[]){
	if (*ch == '/' && *(ch + 1) == '*'){
		char* temp = ch;
		ch = ch + 2;
		int state = 9;
		while (state != 0){
			if (*ch == '*' && *(ch + 1) == '/'){
				ch = ch + 2;
				state = 0;
			}
			else{
				if (*ch == '\0')//这里写文件结束符
				{
					strcpy(&word[wcount].type[0], "ERROR");
					strcpy(&word[wcount].value[0], "the annotate is not finished...");
					wcount++;
					return temp+2;
				}
				ch++;
			}
		}
		return ch;
	}
	else if (*ch == '/' && *(ch + 1) == '/'){
		ch = ch + 2;
		int state = 9;
		while (state != 0){
			if (*ch == '\n' || *ch == '\0'){
				ch = ch + 1;
				state = 0;
			}
			else{
				/*if (*ch == '\n')
				{
					strcpy(&word[wcount].type[0], "AN_ERROR");
					return ++ch;
				}*/
				ch++;
			}
		}
		return ch;
	}
	else{
		return ch;
	}

}

void setResult_OP(Word word[], char op1, char op2){
	if (op2 != '\0'){
		word[wcount].value[0] = op1;
		word[wcount].value[1] = op2;
		word[wcount].value[2] = '\0';
	}
	else{
		word[wcount].value[0] = op1;
		word[wcount].value[1] = op2;
	}
}

void display(Word word[]){
	for (int i = 0; i <wcount; i++)
	{
		cout << "<" << word[i].type << "," << word[i].value << ">" << endl;
	}
}


测试文件:text.txt  和.cpp文件放在同一目录下

void setResult_OP(Word word[], char op1, char op2){
	if (op2 != '\0'){
		word[wcount].value[0] = op1;
	}
	/* this is an annotation ...*/
	int a=10;
	a=a+80;
	float s=2E.2;
	//单行注释
	if(a==s){
	printf("a equal s");
}
}


输出结果:



0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:1199次
    • 积分:63
    • 等级:
    • 排名:千里之外
    • 原创:5篇
    • 转载:2篇
    • 译文:0篇
    • 评论:0条
    文章存档