程序功能:
一、LL(1)文法合法性的判断
包括:
1. 非终结符是否可以退出空
2. 表达式是否退出空
3. 非终结符的First集合
4. 表达式的First集合
5. 非终结符的Follow集合
6. Select集合
7. 是否符合LL(1)文法
二、主控程序,判断句子是否符合当前文法
1. 预测分析表
2. 语法分析程序
特别说明:
(1) 求First集合的时候,并没有使用递归,只是不断的重复扫描,直到集合不再增大。
(2) 求Follow集合的时候使用了递归。
#include <iostream>
#include <string>
#include <cstring>
#include <vector>
#include <cstdio>
#include <map>
#include <stack>
#include <set>
#include <algorithm>
using namespace std;
struct NT
{
char m_name;
int m_isE;
int m_F_num;
bool m_flag;
set<char> FirstSet;
set<char> FollowSet;
NT():m_isE(0),m_flag(false),m_F_num(0){};
NT(char c):m_name(c),m_isE(0),m_flag(false),m_F_num(0){};
};
struct T
{
char m_name;
int m_isE;
bool m_flag;
T():m_isE(0),m_flag(false){};
T(char c):m_name(c),m_isE(0),m_flag(false){};
};
struct FORMULA
{
char m_L;
string m_R;
int m_isE;
bool m_flag;
set<char> SelectSet;
set<char> FirstSet;
FORMULA():m_isE(0),m_flag(false){}
};
//全局变量
int FormulaNumber;
vector<NT> NTs;
vector<T> Ts;
vector<FORMULA> Fs;
FORMULA *AnalysisTable;
map<char,int> GetId; //获取非终结符的下标
map<char,int> GetId2;
char Start;
FILE* FP;
void readNT()
{
string s;
int i;
cin>>s;
for (i=0;i<s.size();++i)
{
NTs.push_back(NT(s[i]));
//建立非终结符下标映射
GetId.insert(map<char,int>::value_type(s[i],GetId.size()));
}
}
void readT()
{
string s;
int i;
cin>>s;
for (i=0;i<s.size();++i)
{
Ts.push_back(T(s[i]));
//建立终结符下标映射
GetId2.insert(map<char,int>::value_type(s[i],GetId2.size()));
}
}
void readFormula()
{
int i;
string s;
char c;
FORMULA f;
for (i=0;i<FormulaNumber;++i)
{
cin>>f.m_L>>c>>c;
cin>>f.m_R;
NTs[GetId[f.m_L]].m_F_num++;
Fs.push_back(f);
}
}
bool isNT(char c)
{
int i;
for (i=0;i<NTs.size();++i)
{
if (NTs[i].m_name == c)
{
return true;
}
}
return false;
}
bool isT(char c)
{
int i;
for (i=0;i<Ts.size();++i)
{
if (Ts[i].m_name == c)
{
return true;
}
}
return false;
}
void printFormula()
{
int i;
cout<<"Formula:"<<endl;
for (i=0;i<Fs.size();++i)
{
cout<<Fs[i].m_L<<"->"<<Fs[i].m_R<<endl;
}
}
//是否推出空
void DoIfIsE()
{
int i;
int done=0;
while (done<NTs.size())
{
for (i=0;i<Fs.size();++i)
{
//处理过,或者已经推导出结果 跳过
if(Fs[i].m_flag || NTs[GetId[Fs[i].m_L]].m_isE!=0)
{
continue;
}
//推出空
if (Fs[i].m_R=="&")
{
NTs[GetId[Fs[i].m_L]].m_isE = 1;
//标记处理过
Fs[i].m_flag = true;
done++;
}
//推出终结符 T
else if (isT(Fs[i].m_R[0]))
{
//标记处理过
Fs[i].m_flag = true;
//NTs[i]的待处理 产生式-1
if ((--NTs[GetId[Fs[i].m_L]].m_F_num)==0)
{
//所有产生式被删除
NTs[GetId[Fs[i].m_L]].m_isE = -1;
done++;
}
}
//推出终结符 NT
else
{
int k;
bool flag=true;
bool allempty=true;
for (k=0;k<Fs[i].m_R.size();++k)
{
if (NTs[GetId[Fs[i].m_R[k]]].m_isE==0)
{
flag=false;
break;
}
if (NTs[GetId[Fs[i].m_R[k]]].m_isE==-1)
{
allempty=false;
}
}
if (!flag)
{
continue;
}
if (allempty)
{
Fs[i].m_flag =true;
NTs[GetId[Fs[i].m_L]].m_isE = 1;
done++;
}else
{
Fs[i].m_flag =true;
NTs[GetId[Fs[i].m_L]].m_isE = -1;
done++;
}
}
}
}
cout<<"推空:"<<endl;
for (i=0;i<NTs.size();++i)
{
cout<<NTs[i].m_name<<"\t "<<NTs[i].m_isE<<endl;
}
}
//表达式右侧是否空
void DoFormulaisE()
{
int i,j;
bool flag;
for (i=0;i<Fs.size();++i)
{
if (Fs[i].m_R[0]=='&')
{
Fs[i].m_isE=1;
continue;
}
flag=true;
for (j=0;j<Fs[i].m_R.size();++j)
{
//if (Fs[i].m_R[j]<='z' && Fs[i].m_R[j]>='a')
if (isT(Fs[i].m_R[j]))
{
flag=false;
break;
}else if(NTs[GetId[Fs[i].m_R[j]]].m_isE==-1)
{
flag=false;
break;
}
}
if (flag)
{
Fs[i].m_isE=1;
}else
Fs[i].m_isE=-1;
}
for (i=0;i<Fs.size();++i)
{
cout<<Fs[i].m_L<<"->"<<Fs[i].m_R<<"\t "<<Fs[i].m_isE<<endl;
}
}
//获取终结符 NTchar 当前的first集合内容
string getFirst(char NTchar)
{
set<char>::iterator it,itend;
string r;
it=NTs[GetId[NTchar]].FirstSet.begin();
itend=NTs[GetId[NTchar]].FirstSet.end();
while (it!=itend)
{
r+=*it;
it++;
}
return r;
}
void printFirstSet()
{
int i;
cout<<"FirstSet:"<<endl;
for(i=0;i<NTs.size();++i)
{
cout<<NTs[i].m_name<<"\t: ";
if (NTs[i].m_isE==1) cout<<'&';
cout<<getFirst(NTs[i].m_name)<<endl;
}
cout<<endl;
}
void DoGetFirstSet()
{
int i;
for (i=0;i<Fs.size();++i)
{
Fs[i].m_flag=0;
}
vector<FORMULA> FF(Fs.begin(),Fs.end());
while (true)
{
//f表示是否 未更新
bool f=true;
//遍历每个 表达式
for (i=0;i<FF.size();++i)
{
//可以跳过的
if (FF[i].m_flag) continue;
//如果表达式右边第一个是非终结符
if(FF[i].m_R[0]=='&')
{
FF[i].m_flag=true;
continue;
}
//if (FF[i].m_R[0]<='z' && FF[i].m_R[0]>='a' )
if (isT(FF[i].m_R[0]) )
{
FF[i].m_flag=true;
NTs[GetId[FF[i].m_L]].FirstSet.insert(FF[i].m_R[0]);
f=false;
continue;
}
int k;//遍历 FF[i] 右侧所有符号
for (k=0;k<FF[i].m_R.size();++k)
{
//如果是终结符 结束
//if (FF[i].m_R[k]<='z' && FF[i].m_R[k]>='a')
if (isT(FF[i].m_R[k]))
{
//FF[i].m_flag=true;
if(NTs[GetId[FF[i].m_L]].FirstSet.insert(FF[i].m_R[k]).second)
f=false;
break;
}
//如果是非终结符,获取此非终结符的 fitsrt
string r=getFirst(FF[i].m_R[k]);
int j;
for (j=0;j<r.size();++j)
{
if (NTs[GetId[FF[i].m_L]].FirstSet.insert(r[j]).second)
f=false;
}
//如果此非终结符 不能推出空 则结束右侧的查找
if (NTs[GetId[FF[i].m_R[k]]].m_isE==-1)
break;
}
}
//printFirstSet();
//集合未更新 退出
if (f)
{
break;
}
}
printFirstSet();
}
//合并集合
void unionSet(set<char> &a,set<char> &b)
{
set<char>::iterator it=b.begin();
while (it!=b.end())
{
a.insert(*it);
++it;
}
}
set<char> getFollow(char NTchar)
{
int i,j,k;
set<char> R=NTs[GetId[NTchar]].FollowSet;
//遇到递归标记返回
if (NTs[GetId[NTchar]].m_flag)
{
return R;
}
//递归标记
NTs[GetId[NTchar]].m_flag=true;
//搜索所有表达式
for (i=0;i<Fs.size();++i)
{
bool flag=true;
/* if (NTs[GetId[Fs[i].m_L]].m_flag)
{
continue;
}*/
//搜索表达式中的非终结符
for (j=0;j<Fs[i].m_R.size();++j)
{
//如果找到,搜索 后面的非终结符
if (Fs[i].m_R[j]==NTchar)
{
for (k=j+1;k<Fs[i].m_R.size();++k)
{
//if (Fs[i].m_R[k]<='z'&&Fs[i].m_R[k]>='a')
if (isT(Fs[i].m_R[k]) )
{
R.insert(Fs[i].m_R[k]);
flag=false;
break;
}
unionSet(R,NTs[GetId[Fs[i].m_R[k]]].FirstSet);
if (NTs[GetId[Fs[i].m_R[k]]].m_isE==-1)
{
flag=false;
break;
}
}
if (flag)
{
unionSet(R,getFollow(Fs[i].m_L));
}
break;
}
}
}
NTs[GetId[NTchar]].m_flag=false;
return R;
}
void printFollowSet()
{
int i,j;
cout<<"FollowSet:"<<endl;
for (i=0;i<NTs.size();++i)
{
set<char>::iterator it;
it=NTs[i].FollowSet.begin();
cout<<NTs[i].m_name<<"\t: ";
while (it!=NTs[i].FollowSet.end())
{
cout<<*it;
++it;
}
cout<<endl;
}
}
void DoGetFollowSet()
{
//初始化 非终结符的 递归标记
int p;
for (p=0;p<NTs.size();++p)
{
NTs[p].m_flag=false;
}
NTs[GetId[Start]].FollowSet.insert('#');
while (true)
{
bool f=true;
//cout<<"done one time"<<endl;
int i;
for (i=0;i<NTs.size();++i)
{
set<char> R=getFollow(NTs[i].m_name);
set<char>::iterator it=R.begin();
while (it!=R.end())
{
if (NTs[i].FollowSet.insert(*it).second) //更新成功
{
f=false;
}
++it;
}
}
if (f)
{
break;
}
}
printFollowSet();
}
void DoGetFormulaFirstSet()
{
int i,j;
//遍历所有表达式
for (i=0;i<Fs.size();++i)
{
//跳过空
if (Fs[i].m_R[0]=='&')
{
continue;
}
//遍历表达式右边
for (j=0;j<Fs[i].m_R.size();++j)
{
//如果是终结符,加入first 结束
//if (Fs[i].m_R[j]<='z'&& Fs[i].m_R[j]>='a')
if (isT(Fs[i].m_R[j]) )
{
Fs[i].FirstSet.insert(Fs[i].m_R[j]);
break;
}else
{
unionSet(Fs[i].FirstSet,NTs[GetId[Fs[i].m_R[j]]].FirstSet);
}
//不能推出空 结束
if (NTs[GetId[Fs[i].m_R[j]]].m_isE == -1)
{
break;
}
}
}
//输出。
cout<<"Formula FirstSet:"<<endl;
for (i=0;i<Fs.size();++i)
{
set<char>::iterator it=Fs[i].FirstSet.begin();
cout<<Fs[i].m_L<<"->"<<Fs[i].m_R<<" \t: ";
if (Fs[i].m_isE==1)
{
cout<<'&';
}
while (it!=Fs[i].FirstSet.end())
{
cout<<*it;
++it;
}
cout<<endl;
}
}
bool cmp(const FORMULA &a,const FORMULA &b)
{
return a.m_L<b.m_L;
}
bool IsLL1()
{
bool charHash[256]={0};
char preChar=0;
//拷贝一份 表达式集
vector<FORMULA> FF(Fs.begin(),Fs.end());
int i;
//按照左部字典序排序
sort(FF.begin(),FF.end(),cmp);
for (i=0;i<FF.size();++i)
{
//左部不同 清空 hash数组
if (FF[i].m_L!=preChar)
{
memset(charHash,0,sizeof(charHash));
}
preChar=FF[i].m_L;
//开始比较
set<char>::iterator it=FF[i].SelectSet.begin();
while (it!=FF[i].SelectSet.end())
{
if (charHash[*it])
{
return false;
}else
charHash[*it]=1;
it++;
}
}
return true;
}
void DoGetSelectSet()
{
int i;
for (i=0;i<Fs.size();++i)
{
unionSet(Fs[i].SelectSet,Fs[i].FirstSet);
if (Fs[i].m_isE==1)
{
unionSet(Fs[i].SelectSet,NTs[GetId[Fs[i].m_L]].FollowSet);
}
}
cout<<"SelectSet:"<<endl;
for (i=0;i<Fs.size();++i)
{
set<char>::iterator it=Fs[i].SelectSet.begin();
cout<<Fs[i].m_L<<"->"<<Fs[i].m_R<<" \t: ";
while (it!=Fs[i].SelectSet.end())
{
cout<<*it;
++it;
}
cout<<endl;
}
}
void DoGetAnalysisTable()
{
int i;
Ts.push_back(T('#'));
GetId2.insert(map<char,int>::value_type('#',GetId2.size()));
AnalysisTable = new FORMULA[NTs.size()*(Ts.size()+1)];
for (i=0;i<Fs.size();++i)
{
set<char>::iterator it=Fs[i].SelectSet.begin();
while (it!=Fs[i].SelectSet.end())
{
AnalysisTable[Ts.size()*GetId[Fs[i].m_L]+GetId2[*it]]=Fs[i];
++it;
}
}
}
FORMULA AnalysisTableAt(char NTchar,char Tchar)
{
return AnalysisTable[Ts.size()*GetId[NTchar]+GetId2[Tchar]];
}
FORMULA AnalysisTableAt(int i,int j)
{
return AnalysisTable[Ts.size()*i+j];
}
void printAnalysisTable()
{
int i,j;
for (i=0;i<(Ts.size()+1)*9;++i) cout<<"=";
cout<<endl<<"| |";
for (i=0;i<Ts.size();++i) cout<<" "<<Ts[i].m_name<<" |";
cout<<endl;
for (i=0;i<NTs.size();++i)
{
cout<<"| "<<NTs[i].m_name<<" |";
for (j=0;j<Ts.size();++j)
{
printf("%5s |",AnalysisTableAt(i,j).m_R.c_str());
}
cout<<endl;
}
for (i=0;i<(Ts.size()+1)*9;++i) cout<<"=";
cout<<endl;
}
void DoMainControl()
{
char S[1001];
string ss;
vector<char> ST;
int it=0,n=1,i;
cout<<"请输入分析串:(长度小于1000)"<<endl;
//fflush(stdin);
cin>>ss;
cout<<ss<<endl;
ss+='#';
//cout<<ss<<endl;
strcpy(S,ss.c_str());
for (i=0;i<(Ts.size()+1)*9;++i) cout<<"=";
cout<<endl;
ST.push_back('#');
ST.push_back(Start);
while (true)
{
if (*ST.rbegin()=='#' && S[it]=='#')
{
cout<<n++<<"\t";
for (i=0;i<ST.size();++i)cout<<ST[i];
cout<<"\t\t"<<S+it<<"\t\t"<<"分析成功"<<endl;
break;
}
if (isT(*ST.rbegin()))
{
if (*ST.rbegin()==S[it])
{
cout<<n++<<"\t";
for (i=0;i<ST.size();++i)cout<<ST[i];
cout<<"\t\t"<<S+it<<"\t\t"<<S[it]<<"匹配"<<endl;
it++;ST.pop_back();
continue;
}else
{
cout<<"不匹配"<<endl;
break;
}
}
FORMULA f=AnalysisTableAt(*ST.rbegin(),S[it]);
if (f.m_R=="&")
{
cout<<n++<<"\t";
for (i=0;i<ST.size();++i)cout<<ST[i];
cout<<"\t\t"<<S+it<<"\t\t"<<f.m_L<<"->"<<f.m_R<<endl;
ST.pop_back();
it++;
continue;
}
else
{
cout<<n++<<"\t";
for (i=0;i<ST.size();++i)cout<<ST[i];
printf("\t\t%s",S+it);
cout<<"\t\t"<<f.m_L<<"->"<<f.m_R<<endl;
ST.pop_back();
for (i=f.m_R.size()-1;i>=0;--i) ST.push_back(f.m_R[i]);
}
}
}
int main()
{
string s;
int i;
cout<<"1.文件输入"<<endl;
cout<<"2.手动输入"<<endl;
cin>>i;
if (i==1)
{
cout<<"请输入文件名"<<endl;
cin>>s;
FP=freopen(s.c_str(),"r",stdin);
readNT();
readT();
cin>>Start;
cin>>FormulaNumber;
readFormula();
}
else
{
cout<<"请输入非终结符:";
readNT();
cout<<"请输入非终结符:";
readT();
cout<<"请输入开始符号:";
cin>>Start;
cout<<"请输入产生式数:";
cin>>FormulaNumber;
readFormula();
}
printFormula();
DoIfIsE();
DoFormulaisE();
DoGetFirstSet();
DoGetFormulaFirstSet();
DoGetFollowSet();
DoGetSelectSet();
if (IsLL1())
{
cout<<"=======此文法符合LL(1)文法========"<<endl;
DoGetAnalysisTable();
printAnalysisTable();
DoMainControl();
}else
{
cout<<"=======此文法不符合LL(1)文法========"<<endl;
DoGetAnalysisTable();
printAnalysisTable();
}
}
第一个测试数据
第一个测试数据