shift-reduce的矛盾处理方法,定义每个reduce和terminal的优先级关系。
比如reduce E:E+int 的优先级为1,*的优先级为2,E:(E)的优先级为3,那么在遇到shift-reduce 冲突时如果状态为E+int,则会shift,而当状态为E:(E)则会reduce
一个reduce的优先级应该取决于其中每个符号的优先级的最低值(大多数情况下只有一个符号),假设高于下一个符号的优先级,则reduce,否则shift。
这样一定程度上可以解决shift reduce conflict。
SLR parser的自动机DFA是一个Valid Item的集合
看了一下Compiler的课觉得可以实现这个DFA,我就写了一下,后面发现真的挺复杂的,写了几天(不是很连续),中间可能有很多值得优化的点,不过第一次发现stl map用在这个程序上实在太适合了,感觉其他程序里用到map的真的有点卡手。
该程序中首先对从lexical analysis来的数据进行了预处理,将Reduce 表达式用一个Vector表示
而后:
1.计算每个Symbol的first set
2.计算每个Symbol的follow set
3.计算每个Symbol(nonterminal)接受某个Symbol后所到达的ValidItem集合(自动机)
4.计算每个ValidItem 接受某个Symbol后所到达的ValidItem集合(自动机2)
5.计算状态接受某个Symbol后所到达的ValidItem集合(自动机3),并将之标为一个新状态(或者以前存在的),到达终止状态时结束。
程序中ValidItem是连续表示的,比如S:.ab 标识是0,S:a.b则为1,S:ab.则为2.如果有下一条Reduce表达式X:cd,则X:.cd标识为3,其中.为解析位置.
#include <bits/stdc++.h>
using namespace std;
//Y:a.Xb
const static int bign = 313;
const static int bigm = 200;//valid item maxnum
const static int mymod = 100001;
const static int UnterminalSymbol = 101;
const static int endSymbol = 300;
int SymbolNum;
int reduceNum;
vector<int> terminal;
vector<int> nonterminal;
map<string, int> symbolToId;
int symbolMark[bign]; // Unterminal : 101 ; terminal 0 - 100 represent diff privilege
vector<vector<int>> ReduceExprList;
int vst[bign];
int privilege[bign]; // privilege of each reduce expression.
int nownum = 1;
set<int> firstset[bign];
set<int> followset[bign];
vector<int> nterminalitem[bign];
vector<int> dependency[bign];
int dushu[bign];
int mqueue[bign], mfront, mtail;
vector<int> ValidItem[bign];//ValidItem[i][j] means validItem (reduceExp = i,pos = j)
int xid[bigm];
int yid[bigm];
int maxValidItem;
int ReduceOp;
//ValidItem[xid[xy]][yid[xy]] == xy
map<int, set<int> > SItemAuto[bign]; //SItemAuto[i][j] means nonterminal symbol i accept a symbol j (terminal or noneterminal) get a valid item set
map<int, set<int> > VItemAuto[bign]; // VItemAuto[i][j] means validitem i accept a symbol j(terminal or noneterminal) get a valid item set
/*if we use map<int, set<int>*> we can optimize the merge process
(by swapping setu, setv when setu.size < setv.size),
but i dont want to do it now.
*/
vector<int> hashlist[mymod];
set<int> DFAState[bign];//DFAState[i] means valid item set of state i
int stateNum;
map<int, int> StateAuto[3 * bign];
int CoreStack[3*bign];
int stackTop;
enum yytokentype {
CLASS = 258,
ELSE = 259,
FI = 260,
IF = 261,
IN = 262,
INHERITS = 263,
LET = 264,
LOOP = 265,
POOL = 266,
THEN = 267,
WHILE = 268,
CASE = 269,
ESAC = 270,
OF = 271,
DARROW = 272,
NEW = 273,
ISVOID = 274,
STR_CONST = 275,
INT_CONST = 276,
BOOL_CONST = 277,
TYPEID = 278,
OBJECTID = 279,
ASSIGN = 280,
NOT = 281,
LE = 282,
ERROR = 283,
};
void Reducefunc(int itemid)
{
printf("Call reduce function %d to create a new object\n", itemid);
}
inline int hashCode(vector<int> &vec)
{
int u = 0;
for (int v:vec)
{
u = (1ll * u * maxValidItem + v) % mymod;
}
return u;
}
inline int hashCode(set<int>& vec)
{
int u = 0;
for (int v : vec)
{
u = (1ll * u * maxValidItem + v) % mymod;
}
return u;
}
inline bool sameVec(vector<int>& vec1, vector<int>& vec2)
{
if (vec1.size() != vec2.size())
return false;
for (int i = 0; i < vec1.size(); i++)
{
if (vec1[i] != vec2[i])
return false;
}
return true;
}
inline bool sameSet(set<int>& set1, set<int>& set2)
{
if (set1.size() != set2.size())
return false;
auto it2 = set2.begin();
for (auto it = set1.begin(); it != set1.end(); it++,it2++)
{
if (*it != *it2)
return false;
}
return true;
}
inline void mtrim(string &tstr)
{
if (tstr.empty())
{
return;
}
tstr.erase(0, tstr.find_first_not_of(" "));
tstr.erase(tstr.find_last_not_of(" ") + 1);
return;
}
inline bool isdigit(char c)
{
if (c >= '0' && c <= '9')
return true;
return false;
}
inline void msplit(string tstr, vector<string>& res)
{
int mstart = 0;
for (int i = 0; i < tstr.length(); i++)
{
if (i == mstart)
continue;
if (tstr[i] == ':' || tstr[i] == ' ')
{
string tmp = tstr.substr(mstart, i - mstart);
mtrim(tmp);
if (!tmp.empty())
{
res.push_back(tmp);
}
mstart = i + 1;
}
}
if (mstart < tstr.length())
{
string tmp = tstr.substr(mstart, tstr.length() - mstart);
mtrim(tmp);
if (!tmp.empty())
{
res.push_back(tmp);
}
}
}
bool mparse(string Expr, vector<int> &rexp,int &v)//construct reduce exp & calculate privilege of reduce exp
{
vector<string> tstr;
tstr.clear();
msplit(Expr, tstr);
//v = 101;
for (string u : tstr)
{
if (!u.empty() && isdigit(u[0]))
{
int tmp = atoi(u.c_str());
if (!vst[tmp])
{
vst[tmp] = 1;
terminal.push_back(tmp);
}
rexp.push_back(tmp);
v = min(symbolMark[tmp], v);
}
else if (symbolToId.count(u) == 0)
{
symbolToId[u] = nownum;
rexp.push_back(nownum);
symbolMark[nownum] = UnterminalSymbol;
nonterminal.push_back(nownum);
vst[nownum] = 1;
//symbolMark
do
{
nownum++;
} while (symbolMark[nownum]);
}
else
{
int tmp = symbolToId[u];
if (!vst[tmp])
{
vst[tmp] = 1;
terminal.push_back(tmp);
}
rexp.push_back(tmp);
v = min(v, symbolMark[tmp]);
}
}
if (0 == rexp.size() || symbolMark[rexp[0]] < UnterminalSymbol)
{
return false;
}
if (1 == rexp.size())
{
rexp.push_back(0);
}
return true;
}
void init()
{
for (int i = 258; i < 284; i++)
{
//terminal.push_back(i);
symbolMark[i] = 100;
}
symbolMark[0] = 100;//which means epsilon
symbolMark['+'] = 2;
symbolToId["+"] = '+';
symbolMark['-'] = 2;
symbolToId["-"] = '-';
symbolMark['*'] = 3;
symbolToId["*"] = '*';
symbolMark['/'] = 3;
symbolToId["/"] = '/';
symbolMark['('] = 100;
symbolToId["("] = '(';
symbolMark[')'] = 100;
symbolToId[")"] = ')';
symbolToId["int"] = 276;
vector<string> tstr{"S:E",
"E:T + E",
"E:T",
"T:int * T",
"T:int",
"T:( E )"};
int testReduceCase = tstr.size();
reduceNum = 0;
for (int i = 0; i < testReduceCase; i++)
{
int pv = 101;
vector<int> ReduceExpr;
if (mparse(tstr[i], ReduceExpr, pv))
{
ReduceExprList.push_back(ReduceExpr);
for (int j = 0; j < ReduceExpr.size(); j++)
{
xid[maxValidItem] = i;
yid[maxValidItem] = j;
ValidItem[i].push_back(maxValidItem++);//maybe maxstate
}
privilege[reduceNum++] = pv;
nterminalitem[ReduceExpr[0]].push_back(i);
}
}
privilege[endSymbol] = 0;
}
inline void setInsert(set<int>& u, int val)
{
if (u.find(val) == u.end())
{
u.insert(val);
}
}
inline void mapInsert(map<int, set<int> >& u, int val)
{
if (u.find(val) == u.end())
{
u.emplace(val, set<int>());
}
}
void mergeSet(set<int>& uset, set<int>& vset)
{
for (int v : vset)
{
if (uset.find(v) == uset.end())
uset.insert(v);
}
}
void mergeMap(map<int, set<int> >& u, map<int, set<int> >& v)
{
for (pair<int, set<int> > tv : v)
{
int nowval = tv.first;
mapInsert(u, nowval);
mergeSet(u[nowval], v[nowval]);
}
}
void CalcFirstSet(int u)
{
if (2 == vst[u])
return;
vst[u] = 2;
if (symbolMark[u] <= 100)//terminal
{
firstset[u].insert(u);
return;
}
for (int item : nterminalitem[u])
{
for (int i = 1; i < ReduceExprList[item].size(); i++)
{
int v = ReduceExprList[item][i];
CalcFirstSet(v);
mergeSet(firstset[u], firstset[v]);
if (firstset[v].count(0) == 0)//dont contain epsilon
break;
}
}
}
void CalcFollowSet()
{
//following is followset calculation
followset[ReduceExprList[0][0]].insert(endSymbol);
for (int i = 0; i < reduceNum; i++)
{
int u = ReduceExprList[i][0];
set<int> nowset;
nowset.clear();
int tm = ReduceExprList[i].size();
bool flag = true;
for (int j = tm - 1; j >= 1; j--)
{
int v = ReduceExprList[i][j];
mergeSet(followset[v], nowset);
if (flag && u != v)
{
dushu[v]++;
dependency[u].push_back(v);
}
if (firstset[v].count(0) == 0) // don't include episilon
{
flag = false;
nowset.clear();
}
mergeSet(nowset, firstset[v]);
}
}
for (int u : nonterminal)
{
if (0 == dushu[u])
mqueue[mtail++] = u;
}
for (int u : terminal)
{
if (0 == dushu[u])
mqueue[mtail++] = u;
}
while (mfront < mtail)//follow set build according to topo structure
{
int u = mqueue[mfront++];
for (int v : dependency[u])
{
mergeSet(followset[v], followset[u]);
if (--dushu[v] == 0)
mqueue[mtail++] = v;
}
}
}
void CalcSymbolTrans(int u) // calculate SItemAuto
{
if (3 == vst[u])
return;
vst[u] = 3;
map<int,set<int>> &nowauto = SItemAuto[u];
for (int item : nterminalitem[u])
{
//mapInsert(nowauto, 0);//epsilon in this item
//setInsert(nowauto[0], ValidItem[item][0]);
for (int i = 1; i < ReduceExprList[item].size(); i++)
{
int v = ReduceExprList[item][i];
mapInsert(nowauto, v);
setInsert(nowauto[v], ValidItem[item][i]);
if (symbolMark[v] <= 100)//terminal
{
break;
}
else
{
CalcSymbolTrans(v);
mergeMap(nowauto, SItemAuto[v]);
if (0 == firstset[v].count(0))//epsilon is not the firstset of sym v
break;
}
}
}
}
void CalcItemTrans()//item trans /last pos dont trans
{
for (int itemid = 0; itemid < reduceNum; itemid++)
{
map<int, set<int>> nowmap;
nowmap.clear();
for (int i = (int)ReduceExprList[itemid].size() - 2; i >= 0; i--)
{
int v = ReduceExprList[itemid][i + 1];
int nowstate = ValidItem[itemid][i];
// auto it = VItemAuto[nowstate];
// nowmap[v].insert(nowstate + 1);
if (firstset[v].count(0) == 0)
nowmap.clear();
/* mapInsert(nowmap, 0);
setInsert(nowmap[0], nowstate);*///insert epsilon trans
mapInsert(nowmap, v);
setInsert(nowmap[v], nowstate + 1);//insert symbol v trans
if (101 == symbolMark[v])
{
mergeMap(nowmap, SItemAuto[v]);
}
mergeMap(VItemAuto[nowstate], nowmap);
}
int reduceState = ValidItem[itemid][(int)ReduceExprList[itemid].size() - 1];
for (int tterminal:followset[ReduceExprList[itemid][0]])
{
if (privilege[itemid] >= privilege[tterminal])
{
mapInsert(VItemAuto[reduceState], tterminal);
setInsert(VItemAuto[reduceState][tterminal], ReduceOp);
}
}
}
}
void StateTrans();
void StateNonTermTrans(int x)
{
int u = CoreStack[stackTop - 1];
if (StateAuto[u].count(x) == 0)
{
set<int> tmpItemSet;
for (int vitem1 : DFAState[u])
{
if (VItemAuto[vitem1].count(x) == 0)
continue;
mergeSet(tmpItemSet, VItemAuto[vitem1][x]);
}
int hashid = hashCode(tmpItemSet);
bool flag = false;
for (int stateid : hashlist[hashid])
{
if (sameSet(DFAState[stateid], tmpItemSet))
{
flag = true;
StateAuto[u][x] = stateid;
break;
}
}
if (!flag)
{
StateAuto[u][x] = stateNum;
hashlist[hashid].push_back(stateNum);
DFAState[stateNum].clear();
for (int tmp : tmpItemSet)
DFAState[stateNum].insert(tmp);
stateNum++;
CoreStack[stackTop++] = StateAuto[u][x];
StateTrans();
stackTop--;
}
}
}
void StateTrans()//StateTrans
{
int u = CoreStack[stackTop - 1];
if (1 == u)//end state
return;
//set<int> symbolAcceptable;
//this structure under may be ugly but most efficient way of coding it which i can imagine
map<int, vector<int>> mymap;
for (int vitem : DFAState[u])
{
for (pair<int, set<int> > trans : VItemAuto[vitem])
{
int x = trans.first;
if (mymap.count(x) == 0)
mymap[x] = vector<int>();
mymap[x].push_back(vitem);
}
}
for (pair<int, vector<int>> sym2item: mymap)
{
int x = sym2item.first;
set<int> tmpItemSet;
bool reduceFlag = false;
int reduceitem = -1;
for (int vitem1 : sym2item.second)
{
if (VItemAuto[vitem1][x].count(ReduceOp) > 0)
{
reduceFlag = true;
reduceitem = xid[vitem1];
break;
}
mergeSet(tmpItemSet, VItemAuto[vitem1][x]);
}
if (reduceFlag)
{
StateAuto[u][x] = - reduceitem - 1; // we can use minus num to represent reduce op
stackTop -= (ReduceExprList[reduceitem].size() - 1);
StateNonTermTrans(ReduceExprList[reduceitem][0]);
int tu = CoreStack[stackTop - 1];
for (int i = 1;i < ReduceExprList[reduceitem].size(); i++)//recover stack state before reduce
{
CoreStack[stackTop++] = StateAuto[tu][ReduceExprList[reduceitem][i]];
tu = CoreStack[stackTop - 1];
}
continue;
}
int hashid = hashCode(tmpItemSet);
bool flag = false;
for (int stateid : hashlist[hashid])
{
if (sameSet(DFAState[stateid], tmpItemSet))
{
flag = true;
StateAuto[u][x] = stateid;
break;
}
}
if (!flag)
{
StateAuto[u][x] = stateNum;
hashlist[hashid].push_back(stateNum);
DFAState[stateNum].clear();
for (int tmp : tmpItemSet)
DFAState[stateNum].insert(tmp);
stateNum++;
CoreStack[stackTop++] = StateAuto[u][x]; //shift
StateTrans();
stackTop--;// recover
}
}
}
void buildStackAuto() //Automata is built on item
{
//memset(vst, 0, sizeof(vst));
ReduceOp = maxValidItem + 1;
for (int u : terminal)
{
firstset[u].insert(u);
vst[u] = 2;
}
firstset[0].insert(0);
vst[0] = 2;
for (int u : nonterminal)
{
CalcFirstSet(u);
}
CalcFollowSet();
for(int u : nonterminal)
CalcSymbolTrans(u);
CalcItemTrans();
setInsert(DFAState[0], 0);
int hcode = hashCode(DFAState[0]);
hashlist[hcode].push_back(0);
stateNum++;
setInsert(DFAState[1], 1);//startstate = 0,endstate = 1
hcode = hashCode(DFAState[1]);
hashlist[hcode].push_back(1);
stateNum++;
CoreStack[stackTop++] = 0;
StateTrans();
//coreAlgorithm();//calculating every item may fall into if confronting a specific terminal by a noneterminal
}
int main()
{
//printf("%d %d %d\n", '+', '-', '*'); //43 45 42
init();
buildStackAuto();
cout << stateNum << endl;
}
这份代码可能有错误,因为只拿了一个测试样例。不过如果没错的话就太棒了。
后面发现了一些错误,现在可能是对的。