parser(包含一个我写的自动机)

shift-reduce的矛盾处理方法,定义每个reduce和terminal的优先级关系。

比如reduce E:E+int 的优先级为1,*的优先级为2,E:(E)的优先级为3,那么在遇到shift-reduce 冲突时如果状态为E+int,则会shift,而当状态为E:(E)则会reduce

一个reduce的优先级应该取决于其中每个符号的优先级的最低值(大多数情况下只有一个符号),假设高于下一个符号的优先级,则reduce,否则shift。

这样一定程度上可以解决shift reduce conflict。

SLR parser的自动机DFA是一个Valid Item的集合

看了一下Compiler的课觉得可以实现这个DFA,我就写了一下,后面发现真的挺复杂的,写了几天(不是很连续),中间可能有很多值得优化的点,不过第一次发现stl map用在这个程序上实在太适合了,感觉其他程序里用到map的真的有点卡手。

该程序中首先对从lexical analysis来的数据进行了预处理,将Reduce 表达式用一个Vector表示

而后:

1.计算每个Symbol的first set

2.计算每个Symbol的follow set

3.计算每个Symbol(nonterminal)接受某个Symbol后所到达的ValidItem集合(自动机)

4.计算每个ValidItem 接受某个Symbol后所到达的ValidItem集合(自动机2)

5.计算状态接受某个Symbol后所到达的ValidItem集合(自动机3),并将之标为一个新状态(或者以前存在的),到达终止状态时结束。

程序中ValidItem是连续表示的,比如S:.ab 标识是0,S:a.b则为1,S:ab.则为2.如果有下一条Reduce表达式X:cd,则X:.cd标识为3,其中.为解析位置.

#include <bits/stdc++.h>
using namespace std;
//Y:a.Xb

const static int bign = 313;
const static int bigm = 200;//valid item maxnum
const static int mymod = 100001;

const static int UnterminalSymbol = 101;
const static int endSymbol = 300;

int SymbolNum;
int reduceNum;

vector<int> terminal;
vector<int> nonterminal;
map<string, int> symbolToId;
int symbolMark[bign]; // Unterminal : 101 ; terminal 0 - 100 represent diff privilege

vector<vector<int>> ReduceExprList;

int vst[bign];

int privilege[bign]; // privilege of each reduce expression.

int nownum = 1;

set<int> firstset[bign];
set<int> followset[bign];
vector<int> nterminalitem[bign];

vector<int> dependency[bign];
int dushu[bign];

int mqueue[bign], mfront, mtail;

vector<int> ValidItem[bign];//ValidItem[i][j] means validItem (reduceExp = i,pos = j)
int xid[bigm];
int yid[bigm];
int maxValidItem;
int ReduceOp;
//ValidItem[xid[xy]][yid[xy]] == xy

map<int, set<int> > SItemAuto[bign]; //SItemAuto[i][j] means nonterminal symbol i accept a symbol j (terminal or noneterminal) get a valid item set
map<int, set<int> > VItemAuto[bign]; // VItemAuto[i][j] means validitem i accept a symbol j(terminal or noneterminal) get a valid item set

/*if we use map<int, set<int>*> we can optimize the merge process
(by swapping setu, setv when setu.size < setv.size),
but i dont want to do it now.
*/
vector<int> hashlist[mymod]; 
set<int> DFAState[bign];//DFAState[i] means valid item set of state i
int stateNum;

map<int, int> StateAuto[3 * bign];
int CoreStack[3*bign];
int stackTop;

enum yytokentype {
	CLASS = 258,
	ELSE = 259,
	FI = 260,
	IF = 261,
	IN = 262,
	INHERITS = 263,
	LET = 264,
	LOOP = 265,
	POOL = 266,
	THEN = 267,
	WHILE = 268,
	CASE = 269,
	ESAC = 270,
	OF = 271,
	DARROW = 272,
	NEW = 273,
	ISVOID = 274,
	STR_CONST = 275,
	INT_CONST = 276,
	BOOL_CONST = 277,
	TYPEID = 278,
	OBJECTID = 279,
	ASSIGN = 280,
	NOT = 281,
	LE = 282,
	ERROR = 283,
};

void Reducefunc(int itemid)
{
	printf("Call reduce function %d to create a new object\n", itemid);
}

inline int hashCode(vector<int> &vec)
{
	int u = 0;
	for (int v:vec)
	{
		u = (1ll * u * maxValidItem + v) % mymod;
	}
	return u;
}

inline int hashCode(set<int>& vec)
{
	int u = 0;
	for (int v : vec)
	{
		u = (1ll * u * maxValidItem + v) % mymod;
	}
	return u;
}

inline bool sameVec(vector<int>& vec1, vector<int>& vec2)
{
	if (vec1.size() != vec2.size())
		return false;
	for (int i = 0; i < vec1.size(); i++)
	{
		if (vec1[i] != vec2[i])
			return false;
	}
	return true;
}

inline bool sameSet(set<int>& set1, set<int>& set2)
{
	if (set1.size() != set2.size())
		return false;
	auto it2 = set2.begin();
	for (auto it = set1.begin(); it != set1.end(); it++,it2++)
	{
		if (*it != *it2)
			return false;
	}
	return true;
}

inline void mtrim(string &tstr)
{
	if (tstr.empty())
	{
		return;
	}
	tstr.erase(0, tstr.find_first_not_of(" "));
	tstr.erase(tstr.find_last_not_of(" ") + 1);
	return;
}

inline bool isdigit(char c)
{
	if (c >= '0' && c <= '9')
		return true;
	return false;
}

inline void msplit(string tstr, vector<string>& res)
{
	int mstart = 0;
	for (int i = 0; i < tstr.length(); i++)
	{
		if (i == mstart)
			continue;
		if (tstr[i] == ':' || tstr[i] == ' ')
		{
			string tmp = tstr.substr(mstart, i - mstart);
			mtrim(tmp);
			if (!tmp.empty())
			{
				res.push_back(tmp);
			}
			mstart = i + 1;
		}
	} 
	if (mstart < tstr.length())
	{
		string tmp = tstr.substr(mstart, tstr.length() - mstart);
		mtrim(tmp);
		if (!tmp.empty())
		{
			res.push_back(tmp);
		}
	}
}
bool mparse(string Expr, vector<int> &rexp,int &v)//construct reduce exp & calculate privilege of reduce exp
{
	vector<string> tstr;
	tstr.clear();
	msplit(Expr, tstr);
	//v = 101;
	for (string u : tstr)
	{
		if (!u.empty() && isdigit(u[0]))
		{
			int tmp = atoi(u.c_str());
			if (!vst[tmp])
			{
				vst[tmp] = 1;
				terminal.push_back(tmp);
			}
			rexp.push_back(tmp);
			v = min(symbolMark[tmp], v);
		}
		else if (symbolToId.count(u) == 0)
		{
			symbolToId[u] = nownum;
			rexp.push_back(nownum);
			symbolMark[nownum] = UnterminalSymbol;

			nonterminal.push_back(nownum);
			vst[nownum] = 1;
			//symbolMark
			do
			{
				nownum++;
			} while (symbolMark[nownum]);
		}
		else
		{
			int tmp = symbolToId[u];
			if (!vst[tmp])
			{
				vst[tmp] = 1;
				terminal.push_back(tmp);
			}
			rexp.push_back(tmp);
			v = min(v, symbolMark[tmp]);
		}
	}
	if (0 == rexp.size() || symbolMark[rexp[0]] < UnterminalSymbol)
	{
		return false;
	}
	if (1 == rexp.size())
	{
		rexp.push_back(0);
	}
	return true;
}
void init()
{
	for (int i = 258; i < 284; i++)
	{
		//terminal.push_back(i);
		symbolMark[i] = 100;
	}
	symbolMark[0] = 100;//which means epsilon

	symbolMark['+'] = 2;
	symbolToId["+"] = '+';

	symbolMark['-'] = 2;
	symbolToId["-"] = '-';

	symbolMark['*'] = 3;
	symbolToId["*"] = '*';

	symbolMark['/'] = 3;
	symbolToId["/"] = '/';

	symbolMark['('] = 100;
	symbolToId["("] = '(';

	symbolMark[')'] = 100;
	symbolToId[")"] = ')';

	symbolToId["int"] = 276;

	vector<string> tstr{"S:E", 
						"E:T + E",
						"E:T",
						"T:int * T",
						"T:int",
						"T:( E )"};
	
	int testReduceCase = tstr.size();
	reduceNum = 0;

	for (int i = 0; i < testReduceCase; i++)
	{
		int pv = 101;
		vector<int> ReduceExpr;
		if (mparse(tstr[i], ReduceExpr, pv))
		{
			ReduceExprList.push_back(ReduceExpr);
			for (int j = 0; j < ReduceExpr.size(); j++)
			{
				xid[maxValidItem] = i;
				yid[maxValidItem] = j;
				ValidItem[i].push_back(maxValidItem++);//maybe maxstate
			}

			privilege[reduceNum++] = pv;
			nterminalitem[ReduceExpr[0]].push_back(i);
		}
	}
	privilege[endSymbol] = 0;
}

inline void setInsert(set<int>& u, int val)
{
	if (u.find(val) == u.end())
	{
		u.insert(val);
	}
}

inline void mapInsert(map<int, set<int> >& u, int val)
{
	if (u.find(val) == u.end())
	{
		u.emplace(val, set<int>());
	}
}

void mergeSet(set<int>& uset, set<int>& vset)
{
	for (int v : vset)
	{
		if (uset.find(v) == uset.end())
			uset.insert(v);
	}
}

void mergeMap(map<int, set<int> >& u, map<int, set<int> >& v)
{
	for (pair<int, set<int> > tv : v)
	{
		int nowval = tv.first;
		mapInsert(u, nowval);
		mergeSet(u[nowval], v[nowval]);
	}
}

void CalcFirstSet(int u)
{
	if (2 == vst[u])
		return;
	vst[u] = 2;
	if (symbolMark[u] <= 100)//terminal
	{
		firstset[u].insert(u);
		return;
	}
	for (int item : nterminalitem[u])
	{
		for (int i = 1; i < ReduceExprList[item].size(); i++)
		{
			int v = ReduceExprList[item][i];
			CalcFirstSet(v);
			mergeSet(firstset[u], firstset[v]);
			if (firstset[v].count(0) == 0)//dont contain epsilon
				break;
		}
	}
}

void CalcFollowSet()
{
	//following is followset calculation
	followset[ReduceExprList[0][0]].insert(endSymbol);
	for (int i = 0; i < reduceNum; i++)
	{
		int u = ReduceExprList[i][0];
		set<int> nowset;
		nowset.clear();
		int tm = ReduceExprList[i].size();
		bool flag = true;
		for (int j = tm - 1; j >= 1; j--)
		{
			int v = ReduceExprList[i][j];
			mergeSet(followset[v], nowset);
			if (flag && u != v)
			{
				dushu[v]++;
				dependency[u].push_back(v);
			}
			if (firstset[v].count(0) == 0) // don't include episilon
			{
				flag = false;
				nowset.clear();
			}
			mergeSet(nowset, firstset[v]);
		}
	}

	for (int u : nonterminal)
	{
		if (0 == dushu[u])
			mqueue[mtail++] = u;
	}
	for (int u : terminal)
	{
		if (0 == dushu[u])
			mqueue[mtail++] = u;
	}

	while (mfront < mtail)//follow set build according to topo structure
	{
		int u = mqueue[mfront++];
		for (int v : dependency[u])
		{
			mergeSet(followset[v], followset[u]);
			if (--dushu[v] == 0)
				mqueue[mtail++] = v;
		}
	}
}

void CalcSymbolTrans(int u) // calculate SItemAuto
{
	if (3 == vst[u])
		return;
	vst[u] = 3;
	map<int,set<int>> &nowauto = SItemAuto[u];
	for (int item : nterminalitem[u])
	{
		
		//mapInsert(nowauto, 0);//epsilon in this item
		//setInsert(nowauto[0], ValidItem[item][0]);
		for (int i = 1; i < ReduceExprList[item].size(); i++)
		{
			int v = ReduceExprList[item][i];
			mapInsert(nowauto, v);
			setInsert(nowauto[v], ValidItem[item][i]);
			if (symbolMark[v] <= 100)//terminal
			{
				break;
			}
			else
			{
				CalcSymbolTrans(v);
				mergeMap(nowauto, SItemAuto[v]);
				if (0 == firstset[v].count(0))//epsilon is not the firstset of sym v
					break;
			}
		}
	}
}

void CalcItemTrans()//item trans /last pos dont trans
{
	for (int itemid = 0; itemid < reduceNum; itemid++)
	{
		map<int, set<int>> nowmap;
		nowmap.clear();
		for (int i = (int)ReduceExprList[itemid].size() - 2; i >= 0; i--)
		{
			int v = ReduceExprList[itemid][i + 1];
			int nowstate = ValidItem[itemid][i];
			//	auto it = VItemAuto[nowstate];
			//	nowmap[v].insert(nowstate + 1);

			if (firstset[v].count(0) == 0)
				nowmap.clear();

		/*	mapInsert(nowmap, 0);
			setInsert(nowmap[0], nowstate);*///insert epsilon trans
			mapInsert(nowmap, v);
			setInsert(nowmap[v], nowstate + 1);//insert symbol v trans

			if (101 == symbolMark[v])
			{
				mergeMap(nowmap, SItemAuto[v]);
			}
			mergeMap(VItemAuto[nowstate], nowmap);
		}
		int reduceState = ValidItem[itemid][(int)ReduceExprList[itemid].size() - 1];

		for (int tterminal:followset[ReduceExprList[itemid][0]])
		{
			if (privilege[itemid] >= privilege[tterminal])
			{
				mapInsert(VItemAuto[reduceState], tterminal);
				setInsert(VItemAuto[reduceState][tterminal], ReduceOp);
			}
		}
	}
}

void StateTrans();
void StateNonTermTrans(int x)
{
	int u = CoreStack[stackTop - 1];
	if (StateAuto[u].count(x) == 0)
	{
		set<int> tmpItemSet;
		for (int vitem1 : DFAState[u])
		{
			if (VItemAuto[vitem1].count(x) == 0)
				continue;
			mergeSet(tmpItemSet, VItemAuto[vitem1][x]);
		}
		int hashid = hashCode(tmpItemSet);
		bool flag = false;
		for (int stateid : hashlist[hashid])
		{
			if (sameSet(DFAState[stateid], tmpItemSet))
			{
				flag = true;
				StateAuto[u][x] = stateid;
				break;
			}
		}
		if (!flag)
		{
			StateAuto[u][x] = stateNum;
			hashlist[hashid].push_back(stateNum);
			DFAState[stateNum].clear();
			for (int tmp : tmpItemSet)
				DFAState[stateNum].insert(tmp);
			stateNum++;
			CoreStack[stackTop++] = StateAuto[u][x];
			StateTrans();
			stackTop--;
		}
	
	}
}

void StateTrans()//StateTrans
{
	int u = CoreStack[stackTop - 1];
	if (1 == u)//end state
		return;
	//set<int> symbolAcceptable;

	//this structure under may be ugly but most efficient way of coding it which i can imagine
	map<int, vector<int>> mymap;
	for (int vitem : DFAState[u])
	{
		for (pair<int, set<int> > trans : VItemAuto[vitem])
		{
			int x = trans.first;
			if (mymap.count(x) == 0)
				mymap[x] = vector<int>();
			mymap[x].push_back(vitem);
		}
	}

	for (pair<int, vector<int>> sym2item: mymap)
	{
		int x = sym2item.first;
		set<int> tmpItemSet;
		bool reduceFlag = false;
		int reduceitem = -1;
		for (int vitem1 : sym2item.second)
		{
			if (VItemAuto[vitem1][x].count(ReduceOp) > 0)
			{
				reduceFlag = true;
				reduceitem = xid[vitem1];
				break;
			}
			mergeSet(tmpItemSet, VItemAuto[vitem1][x]);
		}
		if (reduceFlag)
		{
			StateAuto[u][x] = - reduceitem - 1; // we can use minus num to represent reduce op
			stackTop -= (ReduceExprList[reduceitem].size() - 1);
			StateNonTermTrans(ReduceExprList[reduceitem][0]);
			int tu = CoreStack[stackTop - 1];
			for (int i = 1;i < ReduceExprList[reduceitem].size(); i++)//recover stack state before reduce
			{
				CoreStack[stackTop++] = StateAuto[tu][ReduceExprList[reduceitem][i]];
				tu = CoreStack[stackTop - 1];
			}
			continue;
		}

		int hashid = hashCode(tmpItemSet);
		bool flag = false;
		for (int stateid : hashlist[hashid])
		{
			if (sameSet(DFAState[stateid], tmpItemSet))
			{
				flag = true;
				StateAuto[u][x] = stateid;
				break;
			}
		}
		if (!flag)
		{
			StateAuto[u][x] = stateNum;
			hashlist[hashid].push_back(stateNum);
			DFAState[stateNum].clear();
			for (int tmp : tmpItemSet)
				DFAState[stateNum].insert(tmp);
			stateNum++;
			CoreStack[stackTop++] = StateAuto[u][x]; //shift
			StateTrans();
			stackTop--;// recover
		}
	}
}

void buildStackAuto() //Automata is built on item
{
	//memset(vst, 0, sizeof(vst));
	ReduceOp = maxValidItem + 1;
	for (int u : terminal)
	{
		firstset[u].insert(u);
		vst[u] = 2;
	}
	firstset[0].insert(0);
	vst[0] = 2;
	for (int u : nonterminal)
	{
		CalcFirstSet(u);
	}
	
	CalcFollowSet();

	for(int u : nonterminal)
		CalcSymbolTrans(u);
	CalcItemTrans();

	setInsert(DFAState[0], 0);
	int hcode = hashCode(DFAState[0]);
	hashlist[hcode].push_back(0);
	stateNum++;

	setInsert(DFAState[1], 1);//startstate = 0,endstate = 1
	hcode = hashCode(DFAState[1]);
	hashlist[hcode].push_back(1);
	stateNum++;

	CoreStack[stackTop++] = 0;
	StateTrans();

	//coreAlgorithm();//calculating every item may fall into if confronting a specific terminal by a noneterminal 
}

int main()
{
	//printf("%d %d %d\n", '+', '-', '*'); //43 45 42
	init();
	buildStackAuto();
	cout << stateNum << endl;
}

这份代码可能有错误,因为只拿了一个测试样例。不过如果没错的话就太棒了。

后面发现了一些错误,现在可能是对的。

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值