实验1 词法分析程序实现

一、实验目的

    加深对词法分析器的工作过程的理解;加强对词法分析方法的掌握;能够采用一种编程语言实现简单的词法分析程序;能够使用自己编写的分析程序对简单的程序段进行词法分析。

二、实验软硬件要求

相关软件:VC++2010 或者Dev-Cpp

操作系统:windows操作系统

三、实验要求

1. 对单词的构词规则有明确的定义;

2. 编写的分析程序能够正确识别源程序中的单词符号,包括:标识符、关键字、常见运算符、分隔符、整数、小数、单行注释,字符常数,字符串常数;

3. 识别出的单词以<种别码,值>的形式输出或保存;

4. *对于源程序中的词法错误,能够做出简单的错误处理,给出简单的错误提示,保证顺利完成整个源程序的词法分析;

5. *识别科学计数法数据,多行注释

四、实验内容

    自定义一种程序设计语言,或者选择已有的一种高级语言,利用状态转换图编制它的词法分析程序。词法分析程序的实现可以采用任何一种编程工具。

五、实验步骤(程序代码,运行结果等)

#include <iostream>
#include <map>
#include <algorithm>
#include <string>

#include <fstream>
#include <sstream>
#include <iostream>
#include <stdlib.h>


using namespace std;
string instr;
int index;
char character;
string token;
const int len = 100;
string Reserve[len];
string Boundary[2 * len];
string Operator[3 * len];

struct Binary {
	Binary(int c, string v = "-") {
		category = c;
		value = v;
	}
	int category;
	string value;
};

void init_Reserve() {
	Reserve[1] = "main";
	Reserve[2] = "int";
	Reserve[3] = "if";
	Reserve[4] = "else";
	Reserve[5] = "while";
	Reserve[6] = "for";
	Reserve[7] = "read";
	Reserve[8] = "write";
	Reserve[9] = "bool";
	Reserve[10] = "break";
	Reserve[11] = "case";
	Reserve[12] = "catch";
	Reserve[13] = "char";
	Reserve[14] = "class";
	Reserve[15] = "const";
	Reserve[16] = "continue";
	Reserve[17] = "default";
	Reserve[18] = "delete";
	Reserve[19] = "do";
	Reserve[20] = "double";
	Reserve[21] = "enum";
	Reserve[22] = "false";
	Reserve[23] = "true";
	Reserve[24] = "float";
	Reserve[25] = "friend";
	Reserve[26] = "goto";
	Reserve[27] = "inline";
	Reserve[28] = "long";
	Reserve[29] = "new";
	Reserve[30] = "private";
	Reserve[31] = "protected";
	Reserve[32] = "public";
	Reserve[33] = "return";
	Reserve[34] = "short";
	Reserve[35] = "signed";
	Reserve[36] = "sizeof";
	Reserve[37] = "static";
	Reserve[38] = "struct";
	Reserve[39] = "switch";
	Reserve[40] = "this";
	Reserve[41] = "try";
	Reserve[42] = "typedef";
	Reserve[43] = "unsigned";
	Reserve[44] = "using";
	Reserve[45] = "virtual";
	Reserve[46] = "void";
	Reserve[47] = "include";
	Reserve[48] = "iostream";
	Reserve[49] = "namespace";
	Reserve[50] = "std";
}

void init_Operator() {
	Operator[210] = "+";
	Operator[211] = "-";
	Operator[212] = "*";
	Operator[213] = "/";
	Operator[214] = "<";
	Operator[215] = "<=";
	Operator[216] = ">";
	Operator[217] = ">=";
	Operator[218] = "!=";
	Operator[219] = "==";
	Operator[220] = "=";
}

void init_Boundary() {
	Boundary[121] = "(";
	Boundary[122] = ")";
	Boundary[123] = ",";
	Boundary[124] = ";";
	Boundary[125] = "{";
	Boundary[126] = "}";
	Boundary[127] = "#";
	Boundary[128] = "\'";
	Boundary[129] = "\"";
	Boundary[130] = "//";
	Boundary[131] = "/*";
	Boundary[132] = "*/";
}

bool isWs() {
	if (character == ' ' || character == '\t' || character == '\n' ||
	        character == '\f' || character == '\v' || character == '\0')

		return true;
	else
		return false;
}

bool isOperator() {
	for (int i = 210; i <= 220; i++)
		if (Operator[i][0] == character)
			return true;
	return false;
}

bool isBoundary() {
	for (int i = 121; i <= 132; i++)
		if (Boundary[i][0] == character)
			return true;
	return false;
}

void getChar() {
	character = instr[index++];
}

void getnbc() {
	while (isWs()) {
		getChar();
	}
}

void concat() {
	token = token + character;
}

bool letter() {
	if ((character >= 'A' && character <= 'Z') || (character >= 'a' && character <= 'z'))
		return true;
	return false;
}

bool hex() {
	if ((character >= 'A' && character <= 'F') || (character >= 'a' && character <= 'f') || (character >= '0'
	        && character <= '9'))
		return true;
	return false;
}

bool digit() {
	if (character >= '0' && character <= '9')
		return true;
	return false;
}

void retract() {
	character = ' ';
	index--;
}

int reserve() {
	for (int i = 0; i < len; i++)
		if (Reserve[i] == token)
			return i;
	return -1;
}

int operator1() {
	for (int i = 210; i <= 220; i++)
		if (Operator[i] == token)
			return i;
	return -1;
}

int boundary() {
	for (int i = 121; i <= 132; i++)
		if (Boundary[i] == token)
			return i;
	return -1;
}

int digit1() {
	int i = 0;
	while (token[i] != '\0') {
		if (token[i] == '.')
			return 800;
		i++;
	}
	return 400;
}

bool findstring(string str1, string str2) {
	int i = 0;
	string str3 = "";

	for (i = 0; str1[i + 1] != '\0'; i++) {
		str3 = str1[i] + str1[i + 1] ;
		if (str3 == str2) {
			return true;
		}
		str3 = "";
	}
	return false;
}

Binary error() {
	cout << token << "\t-->\t该单词不合法" << endl;
	return Binary(0, "-");
}


Binary LexAnalyze() {
	token = "";
	getChar();
	getnbc();
	string val;
	int num = -1;
	switch (character) {

		case'a':
		case'b':
		case'c':
		case'd':
		case'e':
		case'f':
		case'g':
		case'h':
		case'i':
		case'j':
		case'k':
		case'l':
		case'm':
		case'n':
		case'o':
		case'p':
		case'q':
		case'r':
		case's':
		case't':
		case'u':
		case'v':
		case'w':
		case'x':
		case'y':
		case'z':
		case'A':
		case'B':
		case'C':
		case'D':
		case'E':
		case'F':
		case'G':
		case'H':
		case'I':
		case'J':
		case'K':
		case'L':
		case'M':
		case'N':
		case'O':
		case'P':
		case'Q':
		case'R':
		case'S':
		case'T':
		case'U':
		case'V':
		case'W':
		case'X':
		case'Y':
		case'Z':

			concat();
			getChar();
			while (letter() || digit()) {
				concat();
				getChar();
			}
			retract();
			num = reserve();
			if (num != -1) {
				return Binary(num, token);
			} else {
				return Binary(700, token);
			}
			break;
		case'*':
			concat();
			num = operator1();
			return Binary(num, token);
			break;
		case'<':

		case'>':

		case'=':

		case'!':

		case'+':

		case'-':
		case'/':

			concat();
			getChar();
			if (character == '=')
				concat();
			else
				retract();
			num = operator1();

			if (token == "/" ) {
				getChar();
				if (character == '/') {
					concat();
					num = boundary();
					getChar();
					while (character != '\n') {
						concat();
						getChar();
					}
					retract();
				}
			}

			if (token == "/") {
				if (character == '*') {
					bool a = true;
					concat();
					num = boundary();
					getChar();
					string token2 = "*/";
					while (a) {
						while (character != '*' && isWs()) {
							getChar();
							concat();
						}
						getChar();
						if (character == '/') {
							concat();
							a = false;
							break;
						} else
							retract();
					}

				}
			}
			return Binary(num, token);
			break;
		case'(':
		case')':

		case',':

		case';':

		case'{':
		case'}':

		case'#':
			concat();
			if (isBoundary()) {
				num = boundary();
				return Binary(num, token);
			}
			break;
		case '\'':
			concat();
			getChar();
			if (character >= 'a' && character <= 'z' || character >= 'A' && character <= 'Z')
				concat();
			else if (character == '\\') {
				concat();
				getChar();
				if (character == 'n' || character == 't' || character == 'r' || character == '0' || character == '\''
				        || character == '\"'
				        || character == '\\') {
					concat();
				}
			}

			getChar();
			if (character == '\'') {
				concat();
				return Binary(500, token);
			} else
				retract();
			num = boundary();
			return Binary(num, token);
			break;
		case '"':

			concat();
			getChar();
			while (character != '"') {
				concat();
				getChar();
			}
			concat();
			return Binary(600, token);
			break;
		case'0':
				
				concat();
			getChar();
			if (character == 'x') {
				concat();
				getChar();
				while (hex()) {
					concat();
					getChar();
				}
				retract();
				return Binary(160, token);
			} else {
				while (digit()) {
					concat();
					getChar();
				}
				
				if (character == '.') {
					concat();
					getChar();
					while (digit()) {
						concat();
						getChar();
					}
					if (character == 'e') {
						concat();
						getChar();
						while (digit()) {
							concat();
							getChar();
						}
					}
				}
				retract();
				num = digit1();
				return Binary(num, token);
			}
			break;
		case'1':
			case'2':
				case'3':
					case'4':
						case'5':
							case'6':
								case'7':
									case'8':
										case'9':

												
												concat();
			getChar();
			while (digit()) {
				concat();
				getChar();
			}
			
			if (character == '.') {
				concat();
				getChar();
				while (digit()) {
					concat();
					getChar();
				}
				if (character == 'e' || character == 'E') {
					concat();
					getChar();
					while (digit()) {
						concat();
						getChar();
					}
				}
			}
			retract();
			num = digit1();
			return Binary(num, token);
			break;
		case '\\':
			concat();
			getChar();
			if (character == 'n' || character == 't' || character == 'r' || character == '0' || character == '\''
			        || character == '\"'
			        || character == '\\') {
				concat();
				return Binary(500, token);
			} else {
				retract();
				num = boundary();
				return Binary(num, token);
			}
			break;
		default:
			
			concat();
			return error();
	}
}

void show_table() {
	cout << "保留字符\t类别编码" << endl;
	for (int i = 0; i < len; i++) {
		if (Reserve[i] != "") {
			if (Reserve[i].size() >= 8)
				cout << Reserve[i] << "\t" << i << endl;
			else
				cout << Reserve[i] << "\t\t" << i << endl;
		}
	}

	cout << "界符\t\t类别编码" << endl;
	for (int i = 0; i < 2 * len; i++) {
		if (Boundary[i] != "") {
			cout << Boundary[i] << "\t\t" << i << endl;
		}
	}
	cout << "运算符\t\t类别编码" << endl;
	for (int i = 0; i < 3 * len; i++) {
		if (Operator[i] != "") {
			cout << Operator[i] << "\t\t" << i << endl;
		}
	}
}


string readFileIntoString(char *filename) {
	ifstream ifile(filename);
	
	ostringstream buf;
	char ch;
	while (buf && ifile.get(ch))
		buf.put(ch);
	
	return buf.str();
}

int main() {
	init_Reserve();
	init_Boundary();
	init_Operator();
	index = 0;
	character = ' ';
	token = "";
	
	char fn1[] = "C:\\Users\\86198\\Desktop\\21172227a.txt";
	char *fn = fn1;
	
	string str;
	str = readFileIntoString(fn);
	instr = str;
	cout << instr << endl; 
	
	Binary word(0, "-");

	while (index < instr.size()) {
		word = LexAnalyze();
		if (word.category != 0) {
			cout << "识别单词:\t(" << word.category << "," << word.value << ")" << endl;
		}

	}
	
	show_table();

	system("pause");
	return 0;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值