编译原理词法分析+语法分析(LL1分析表实现)

词法分析识别C语言格式:

{
int a;
int b;
int i;
a = 0;
b = 1;
for (i=1; i <= 10; i=i+1)
{
a=a+i;
b=b*i;
}
}

语法分析:(注意C语言语法规则,先定义后操作,所有定义必须写在前面)

语法分析格式了一下词法分析输出文本便于语义分析操作。

语法分析采用LL1分析表完成

LL1分析表如下:

  {}intifreadwriteforwhileIDNUM;()else+-*/>< =>=<= ==!=#
<program>SS -> { A B }                        
<declaration_list>AA -> εA -> εA -> CA -> εA -> εA -> εA -> εA -> εA -> ε A -> ε              A -> ε
<statement_list>BB -> FB -> ε B -> FB -> FB -> FB -> FB -> FB -> F B -> F              B -> ε
<declaration_list1>CC -> εC -> εC -> D CC -> εC -> εC -> εC -> εC -> εC -> ε C -> ε              C -> ε
<declaration_stat>D  D -> int ID ;                       
<statement_list1>FF -> G FF -> ε F -> G FF -> G FF -> G FF -> G FF -> G FF -> G F F -> G F              F -> ε
<statement>GG -> M  G -> HG -> KG -> LG -> JG -> IG -> R G -> ;               
<if_stat>H   H -> if ( P ) G Z                      
ε|else <statement>ZZ -> εZ -> ε Z -> εZ -> εZ -> εZ -> εZ -> εZ -> ε Z -> ε  Z -> else G           Z -> ε
<while_stat>I       I -> while ( P ) G                  
<for_stat>J      J -> for ( O ; P ; O ) G                   
<read_stat>K    K -> read ID ;                     
<write_stat>L     L -> write Q ;                    
<compound_stat>MM -> { B }                         
<assignment_expression>O        O -> ID = Q                 
<bool_expression>P        P -> Q TP -> Q T P -> Q T              
<arithmetic_expression>Q        Q -> V WQ -> V W Q -> V W              
<assignment_stat>R        R -> O ;                 
<bool_expression1>T                  T -> > QT -> < QT -> = QT -> >= QT -> <= QT -> == QT -> != Q 
<term>V        V -> X YV -> X Y V -> X Y              
<arithmetic_expression1>W          W -> ε W -> ε W -> + V WW -> + V W  W -> εW -> εW -> εW -> εW -> εW -> εW -> εW -> ε
<factor>X        X -> IDX -> NUM X -> ( Q )              
<term1>Y          Y -> ε Y -> ε Y -> εY -> εY -> * X YY -> / X YY -> εY -> εY -> εY -> εY -> εY -> εY -> εY -> ε

注意if特判,if语句有两个产生式但是我表里面只有一个产生式,我在代码里面特判了if语句

 

整个词法+语法分析代码如下(C++):

/**
*	Create By ZZK
**/
#include<iostream>
#include<fstream>
#include<string>
#include<string.h>
#include<vector>
#include<stack>
#include<map>
using namespace std;
#define mkp(a,b) make_pair(a,b)
int index = 0;				 //文件字符伪指针
int len = 0;				 //存一行字符串长度
char buffer[1005];			 //存一行文本字符
ifstream read("A.txt");		 //输入流	注意修改为自己的文件
ofstream anss("lex.txt");	 //输出流	注意修改为自己的文件
int nowline = 0;			 //保存当前行
int firstline = 0;			 //保存第一个注释(/*)位置
string ans = "";
string mKeyWord[7] = { "if","else","while","for","int","read","write" };//保留字
char mSprate[6] = { ';',',','{','}','(',')' };				//分隔符  
char mOperater[8] = { '+','-','*','/','>','<','=','!' };     //运算符  
char mPassword[4] = { ' ','\t','\r','\n' };

//状态函数申明对应DFA的11个状态
//下面所有函数的参数解释

/*
*	upper进入此状态之前的字符串
*  st 上一个状态传递过来的字符
*
*/

void error(string st, int line);				//错误状态
void firstAlph(string upper, char st);			//A状态
void firstNumber1(string upper, char st);		//B状态
void firstNumber0(string upper, char st);		//B1状态
void firstSprate(string upper, char st);		//C状态
void firstFei(string upper, char st);			//D1状态
void secondEqual(string upper, char st);		//D状态
void firstE_Mi_Ma(string upper, char st);		//D2状态
void firstDiv(char st);							//E状态
void secondM(string upper, char st);			//E1状态
void ThirdM(string upper, char st);				//E2状态
void fourthDiv(string upper, char st);			//E3状态

/**
 *	获取下一个字符函数
 *	返回下一个字符
 *  字符指针在文件中的位置用全局变量index来存储
 *
 */
char getnextchar() {
	if (index < len)
		return buffer[index++];
	else {
		if (!read.eof()) {
			nowline++;
			read.getline(buffer, 1000);
			index = 0;
			len = strlen(buffer);
			//每行后面加个空格标注行
			buffer[len] = ' ';
			buffer[len + 1] = '\0';
			len++;
			return buffer[index++];
		}
		else {
			return '\0';
		}
	}
}
//判断是否为保留字
bool isKeyWord(string word) {
	for (int i = 0; i < 7; i++) {
		if (mKeyWord[i] == word)return true;
	}
	return false;
}
//判断分隔符
bool isSprate(char ch) {
	for (int i = 0; i < 6; i++) {
		if (mSprate[i] == ch)return true;
	}
	return false;
}
//判断运算符
bool isOperator(char ch) {
	for (int i = 0; i < 8; i++) {
		if (mOperater[i] == ch)return true;
	}
	return false;
}
//判断过滤符
bool isPassWord(char ch) {
	for (int i = 0; i < 4; i++) {
		if (mPassword[i] == ch)return true;
	}
	return false;
}
//判断字母
bool isAlph(char st) {
	if (st >= 'a'&&st <= 'z' || st >= 'A'&&st <= 'Z')return true;
	else return false;
}
//判断数字
bool isNumber(char st) {
	if (st >= '0'&&st <= '9')return true;
	else return false;
}
/**
* st	错误字符串
* line 表示第几行错误
*/
void error(string st, int line) {
	cout << "第" << line << "行错误: " << st << endl;
}

void firstAlph(string upper, char st) {
	char nextch = getnextchar();
	if (isAlph(nextch) || isNumber(nextch)) {
		firstAlph(upper + st, nextch);
	}
	else {
		index--;
		//判断是否保留字
		if (isKeyWord(upper + st)) {
			anss << nowline << " " << "DEF " << upper + st << endl;
		}
		else {
			anss << nowline << " " << "ID " << upper + st << endl;
		}

	}
}

void firstNumber1(string upper, char st) {
	char nextch = getnextchar();
	if (isNumber(nextch)) {
		firstNumber1(upper + st, nextch);//状态转移
	}
	else {
		index--;
		anss << nowline << " " << "UNSIGNINT " << upper + st << endl;
	}
}

void firstNumber0(string upper, char st) {
	anss << nowline << " " << "UNSIGNINT " << upper + st << endl;
}

void firstSprate(string upper, char st) {
	anss << nowline << " " << "DEM " << upper + st << endl;
}

void firstFei(string upper, char st) {
	char nextch = getnextchar();
	if (nextch != '=') {
		index--;
		error(upper + st, nowline);//出现错误
	}
	else {
		secondEqual(upper + st, nextch);//状态转移
	}
}
//upper上级字符串
void secondEqual(string upper, char st) {
	anss << nowline << " " << "OP " << upper + st << endl;
}
//Equals =   min <   max >   
void firstE_Mi_Ma(string upper, char st) {
	char nextch = getnextchar();
	index--;//回退一个字符
	if (nextch == '=') {
		index++;//前进一个字符
		secondEqual(upper + st, nextch);//跳转下一个状态
	}
	else {
		anss << nowline << " " << "OP " << st << endl;
	}
}

void firstDiv(char st) {
	ans += st;
	//遇到/再读一个字符,判断是否是注释
	char nextch = getnextchar();
	index--;
	if (nextch == '*') {
		index++;
		firstline = nowline;	//记录第一个注释位置
		secondM(ans, nextch);//跳转下一个状态
	}
	else {
		anss << nowline << " " << "OP " << st << endl;
	}
}
//Muti *
void secondM(string upper, char st) {
	char nextch = getnextchar();
	//特判文件结束
	if (nextch == '\0') {
		error(upper + st, firstline);
		//return;
	}
	if (nextch != '*') {
		//nextch字符在注释之内,故抛弃该字符
		secondM(upper, st);//再次进入此状态
	}
	else {//nextch == '*',将上级字符拼接传到下个函数
		upper = upper + st;
		ThirdM(upper, nextch);//跳转下一个状态
	}
}

void ThirdM(string upper, char st) {
	char nextch = getnextchar();
	if (nextch == '*') {
		ThirdM(upper, nextch);//再次进入此状态
	}
	else if (nextch != '*'&&nextch != '/') {
		string temp = "/";
		secondM(temp, '*');				//回退上一个状态
	}
	else if (nextch == '/') {
		upper = upper + st;
		fourthDiv(upper, nextch);//跳转下一个状态
	}
	else {
		error("/*", firstline);//错误就输出
	}
}
//注释终止状态
void fourthDiv(string upper, char st) {
	anss << nowline << " " << "NOTE " << "/*" << endl;
	anss << nowline << " " << "NOTE " << "*/" << endl;
}

void analyse() {
	char st = getnextchar();
	while (st != '\0') {
		ans = "";
		//根据第一个字符进行状态转移
		if (isPassWord(st)) {
			st = getnextchar();
			continue;
		}
		if (isAlph(st)) {
			firstAlph(ans, st);
		}
		else if (st == '0') {
			firstNumber0(ans, st);
		}
		else if (st != '0'&&isNumber(st)) {
			firstNumber1(ans, st);
		}
		else if (isSprate(st)) {
			firstSprate(ans, st);
		}
		else if (st == '!') {
			firstFei(ans, st);
		}
		else if (st == '+' || st == '-' || st == '*') {
			secondEqual(ans, st);
		}
		else if (st == '=' || st == '<' || st == '>') {
			firstE_Mi_Ma(ans, st);
		}
		else if (st == '/') {
			firstDiv(st);
		}
		else {
			error(ans + st, nowline);
		}
		st = getnextchar();
	}
}


/*   下面语法分析  */


int index2 = 0;				 //文件字符伪指针
int len2 = 0;				 //存一行字符串长度
char buffer2[1005];			 //存一行文本字符
ifstream read2("lex.txt");		 //输入流	注意修改为自己的文件
stack<string>mStack;
string Vt[26] = { "{","}","int","if","read","write","for","while","ID","NUM",";","(",")","else","+","-","*","/",">","<","=","+","<=",">=","==","!=" };
map<pair<string, string>, string>mp;


void inittext();	//初始化文本
void error();		//语法错误函数
void S_program();	//语法分析开始
void initMp();
struct Word
{
	string line;	//单词所在行
	string content;	//单词内容
	string type;	//单词类型
};

Word word;		//全局单词
Word text[1000];//全局文本
Word lastword;	//保存前一个单词

/*	初始化LL1分析表采用MAP结构储存
 *	这么多当然不是自己一个一个手写出来的啦
 *	使用强大的Python读取Excel生成的
 **/
void initMp() {
	mp[mkp("S", "{")] = "{ A B }";
	mp[mkp("A", "{")] = "NULL";
	mp[mkp("A", "}")] = "NULL";
	mp[mkp("A", "int")] = "C";
	mp[mkp("A", "if")] = "NULL";
	mp[mkp("A", "read")] = "NULL";
	mp[mkp("A", "write")] = "NULL";
	mp[mkp("A", "for")] = "NULL";
	mp[mkp("A", "while")] = "NULL";
	mp[mkp("A", "ID")] = "NULL";
	mp[mkp("A", ";")] = "NULL";
	mp[mkp("A", "#")] = "NULL";
	mp[mkp("B", "{")] = "F";
	mp[mkp("B", "}")] = "NULL";
	mp[mkp("B", "if")] = "F";
	mp[mkp("B", "read")] = "F";
	mp[mkp("B", "write")] = "F";
	mp[mkp("B", "for")] = "F";
	mp[mkp("B", "while")] = "F";
	mp[mkp("B", "ID")] = "F";
	mp[mkp("B", ";")] = "F";
	mp[mkp("B", "#")] = "NULL";
	mp[mkp("C", "{")] = "NULL";
	mp[mkp("C", "}")] = "NULL";
	mp[mkp("C", "int")] = "D C";
	mp[mkp("C", "if")] = "NULL";
	mp[mkp("C", "read")] = "NULL";
	mp[mkp("C", "write")] = "NULL";
	mp[mkp("C", "for")] = "NULL";
	mp[mkp("C", "while")] = "NULL";
	mp[mkp("C", "ID")] = "NULL";
	mp[mkp("C", ";")] = "NULL";
	mp[mkp("C", "#")] = "NULL";
	mp[mkp("D", "int")] = "int ID ;";
	mp[mkp("F", "{")] = "G F";
	mp[mkp("F", "}")] = "NULL";
	mp[mkp("F", "if")] = "G F";
	mp[mkp("F", "read")] = "G F";
	mp[mkp("F", "write")] = "G F";
	mp[mkp("F", "for")] = "G F";
	mp[mkp("F", "while")] = "G F";
	mp[mkp("F", "ID")] = "G F";
	mp[mkp("F", ";")] = "G F";
	mp[mkp("F", "#")] = "NULL";
	mp[mkp("G", "{")] = "M";
	mp[mkp("G", "if")] = "H";
	mp[mkp("G", "read")] = "K";
	mp[mkp("G", "write")] = "L";
	mp[mkp("G", "for")] = "J";
	mp[mkp("G", "while")] = "I";
	mp[mkp("G", "ID")] = "R";
	mp[mkp("G", ";")] = ";";
	mp[mkp("H", "if")] = "if ( P ) G Z";
	mp[mkp("Z", "{")] = "NULL";
	mp[mkp("Z", "}")] = "NULL";
	mp[mkp("Z", "if")] = "NULL";
	mp[mkp("Z", "read")] = "NULL";
	mp[mkp("Z", "write")] = "NULL";
	mp[mkp("Z", "for")] = "NULL";
	mp[mkp("Z", "while")] = "NULL";
	mp[mkp("Z", "ID")] = "NULL";
	mp[mkp("Z", ";")] = "NULL";
	mp[mkp("Z", "else")] = "else G";
	mp[mkp("Z", "#")] = "NULL";
	mp[mkp("I", "while")] = "while ( P ) G";
	mp[mkp("J", "for")] = "for ( O ; P ; O ) G";
	mp[mkp("K", "read")] = "read ID ;";
	mp[mkp("L", "write")] = "write Q ;";
	mp[mkp("M", "{")] = "{ B }";
	mp[mkp("O", "ID")] = "ID = Q";
	mp[mkp("P", "ID")] = "Q T";
	mp[mkp("P", "NUM")] = "Q T";
	mp[mkp("P", "(")] = "Q T";
	mp[mkp("Q", "ID")] = "V W";
	mp[mkp("Q", "NUM")] = "V W";
	mp[mkp("Q", "(")] = "V W";
	mp[mkp("R", "ID")] = "O ;";
	mp[mkp("T", ">")] = "> Q";
	mp[mkp("T", "<")] = "< Q";
	mp[mkp("T", " =")] = "= Q";
	mp[mkp("T", ">=")] = ">= Q";
	mp[mkp("T", "<=")] = "<= Q";
	mp[mkp("T", " ==")] = "== Q";
	mp[mkp("T", "!=")] = "!= Q";
	mp[mkp("V", "ID")] = "X Y";
	mp[mkp("V", "NUM")] = "X Y";
	mp[mkp("V", "(")] = "X Y";
	mp[mkp("W", ";")] = "NULL";
	mp[mkp("W", ")")] = "NULL";
	mp[mkp("W", "+")] = "+ V W";
	mp[mkp("W", "-")] = "+ V W";
	mp[mkp("W", ">")] = "NULL";
	mp[mkp("W", "<")] = "NULL";
	mp[mkp("W", " =")] = "NULL";
	mp[mkp("W", ">=")] = "NULL";
	mp[mkp("W", "<=")] = "NULL";
	mp[mkp("W", " ==")] = "NULL";
	mp[mkp("W", "!=")] = "NULL";
	mp[mkp("W", "#")] = "NULL";
	mp[mkp("X", "ID")] = "ID";
	mp[mkp("X", "NUM")] = "NUM";
	mp[mkp("X", "(")] = "( Q )";
	mp[mkp("Y", ";")] = "NULL";
	mp[mkp("Y", ")")] = "NULL";
	mp[mkp("Y", "+")] = "NULL";
	mp[mkp("Y", "-")] = "NULL";
	mp[mkp("Y", "*")] = "* X Y";
	mp[mkp("Y", "/")] = "/ X Y";
	mp[mkp("Y", ">")] = "NULL";
	mp[mkp("Y", "<")] = "NULL";
	mp[mkp("Y", " =")] = "NULL";
	mp[mkp("Y", ">=")] = "NULL";
	mp[mkp("Y", "<=")] = "NULL";
	mp[mkp("Y", " ==")] = "NULL";
	mp[mkp("Y", "!=")] = "NULL";
	mp[mkp("Y", "#")] = "NULL";
}

bool isVt(string s) {
	for (int i = 0; i < 26; i++) {
		if (s == Vt[i]) {
			return true;
		}
	}
	return false;
}
//以空格字符分割字符串函数
vector<string> split(string str)
{
	string::size_type pos;
	vector<string> result;
	str += " ";//扩展字符串以方便操作
	int size = str.size();

	for (int i = 0; i<size; i++)
	{
		pos = str.find(" ", i);
		if (pos<size)
		{
			string s = str.substr(i, pos - i);
			result.push_back(s);
			i = pos;
		}
	}
	return result;
}

void inittext() {
	bool flag = 0;
	while (!read2.eof()) {
		read2.getline(buffer2, 1000);
		string tmp = buffer2;
		while (tmp.length() == 0) {
			if (!read2.eof()) {
				read2.getline(buffer2, 1000);
				tmp = buffer2;
			}
			else {
				word.line = "最后一";
				word.content = "#";
				word.type = "#";
				text[len2++] = word;
				flag = 1;
				return;
			}
		}
		string str = " ";
		vector<string>strlist = split(tmp);
		word.line = strlist[0];
		word.type = strlist[1];
		word.content = strlist[2];
		if (word.type == "UNSIGNINT")word.type = "NUM";
		else if (word.type == "ID")word.type = "ID";
		else word.type = word.content;
		text[len2++] = word;
	}
	if (flag) {
		word.line = "9999999";
		word.content = '\0';
		word.type = "#";
		text[len2++] = word;
		return;
	}
}

Word getNext() {
	if (index2 < len2) {
		return text[index2++];
	}
	else {
		return text[len2 - 1];
	}
}

bool isStatementFirst(Word wd) {
	if (wd.type == "if" || wd.type == "read" || wd.type == "for" || wd.type == "while"
		|| wd.type == "{" || wd.type == "ID" || wd.type == "write" || wd.type == ";")
		return true;
	else
		return false;
}

void error() {
	cout << lastword.line << "行错误  " << "错误单词: " << lastword.content << endl;
}

void S_program() {
	bool flag = 1;
	word = getNext();
	lastword = word;
	string exp;
	while (flag) {
		exp = mStack.top();
		mStack.pop();
		if (word.line == "21") {
			int as = 1;
			as += 1;
		}
		if (isVt(exp)) {
			if (word.type == exp) {
				lastword = word;
				word = getNext();
			}
			else {
				error();
				return;
			}
		}
		else if (exp == "#") {
			if (exp == word.content) {
				cout << "语法分析成功" << endl;
				flag = 0;
			}
			else {
				error();
				return;
			}
		}
		else if (mp.find(mkp(exp, word.type)) != mp.end()) {
			if (word.type == "else") {
				Word tmp = getNext();
				index2--;//回退
				if (!isStatementFirst(tmp)) {
					continue;
				}
			}
			if (word.line == "5") {
				word.line = "5";
			}
			string tmp = mp[mkp(exp, word.type)];
			if (tmp != "NULL") {
				vector<string>a = split(tmp);
				reverse(a.begin(), a.end());//反转字符串
				for (int i = 0; i < a.size(); i++) {
					mStack.push(a[i]);
				}
			}
		}
		else {
			error();
			return;
		}
	}
}

void outputtext() {
	ofstream outt("C.txt");	 //输出流
	for (int i = 0; i < len2; i++) {
		outt << text[i].type << ' ' << text[i].content << endl;
	}
}

int main() {
	//读文件
	if (!read.is_open()) {
		cout << "打开文件失败" << endl;
	}
	else {
		//词法分析函数
		analyse();
		cout << "词法分析完成,结果输出到了lex.txt文件" << endl;
		//读文件
		if (!read2.is_open()) {
			cout << "打开文件失败" << endl;
		}
		else {
			//初始化文本
			inittext();
			//格式化词法分析输出
			outputtext();
			printf("C.txt文件输出成功\n");
			initMp();
			mStack.push("#");
			mStack.push("S");
			S_program();
		}

	}
	return 0;
}

 

python读取excel生成C++初始LL1分析表代码:

import xlrd
import xlwt

workbook = xlrd.open_workbook(r'E:\\Python3Project\\first\\data3.xlsx')

print(workbook.sheet_names())
sheet1 = workbook.sheet_by_index(0)  # sheet索引从0开始
# sheet的名称,行数,列数
row = sheet1.nrows
col = sheet1.ncols
print(sheet1.name, sheet1.nrows, sheet1.ncols)
vt=sheet1.row(0)

for i in range(1, row, 1):
    for j in range(2, col, 1):
        ss = sheet1.row(i)[j].value
        if ">" in ss:
            ss1 = ss[:1]
            ss2 = ss[5:]
            ls=[]
            if ss2 == "ε":
                ss2 = "NULL"
            stans="mp[mkp(\""+ss1+"\",\""+vt[j].value+"\")] = \""+ss2+"\";"
            print(stans)

运行此程序可以看到此结果:

是不是很方便,直接生成C++代码,什么?你不懂怎么使用python读取excel,去看我的上一篇python读取excel博客吧

 

最后附上词法分析以及语法所有文件结果图片:

输入文件A.txt

单词分类文件:

 

lex.txt输出文件:

C.txt输出文件:

附上一张运行程序结果:

下一篇语义分析继续。

  • 5
    点赞
  • 37
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值