一、实验目的
理解LR(1)语法分析方法的原理,掌握LR(1)分析表的构造,设计相关数据结构和程序结构,加深对自下而上语法分析方法的理解。
二、实验内容
需要实现的功能:
1)使用LR(1)分析方法构造识别活前缀的DFA;
2)构造文法的分析表(Action表和Goto表);
3)输入文法:文法描述存储在文本文件中,文件名作为命令行参数输入;
4)输出文法的项目集簇(标准输出设备);
5)输出识别活前缀的DFA(标准输出设备);
6)输出文法的Action表和Goto表(输出到创建的指定LR分析表文件,文件名与文法描述文件同名,扩展名为lrtbl);
7)输出文法是否是LR(1)文法的判断结果(标准输出设备)。
三、实验要求
1)文法描述文件和LR分析表文件的格式参见文档《实验用文件结构.doc》;
2)使用《LR0分析表的构造》实验的结果。
附件:样例
描述:
文法G =(VN,VT,P,S)其中:
VN = { S', S, L, R }
VT = { =, *, i }
P = { S' -> S
S -> L=R
S -> R
L -> *R
L ->i
R -> L
}
S = S’
输入(文法描述文件):
4
S' S L R
3
= * i
6
S' -> S
S -> L = R
S -> R
L -> * R
L -> i
R -> L
S'
输出(标准输出设备):
CFG=(VN,VT,P,S)
VN: S' S L R
VT: = * i
Production:
0: S' -> S
1: S -> L = R
2: S -> R
3: L -> * R
4: L -> i
5: R -> L
StartSymbol: S'
[LR(1) item set cluster]
I0 :
S' -> . S, #
S -> . L = R, #
S -> . R, #
L -> . * R, #
L -> . * R, =
L -> . i, #
L -> . i, =
R -> . L, #
I1 :
S' -> S ., #
I2 :
S -> L . = R, #
R -> L ., #
I3 :
S -> R ., #
I4 :
L -> . * R, #
L -> . * R, =
L -> * . R, #
L -> * . R, =
L -> . i, #
L -> . i, =
R -> . L, #
R -> . L, =
I5 :
L -> i ., #
L -> i ., =
I6 :
L -> * R ., #
L -> * R ., =
I7 :
R -> L ., #
R -> L ., =
I8 :
S -> L = . R, #
L -> . * R, #
L -> . i, #
R -> . L, #
I9 :
S -> L = R ., #
I10:
L -> . * R, #
L -> * . R, #
L -> . i, #
R -> . L, #
I11:
L -> i ., #
I12:
R -> L ., #
I13:
L -> * R ., #
[LR(1) state tran function]
0 , * -> 4
0 , i -> 5
0 , S -> 1
0 , L -> 2
0 , R -> 3
2 , = -> 8
4 , * -> 4
4 , i -> 5
4 , L -> 7
4 , R -> 6
8 , * -> 10
8 , i -> 11
8 , L -> 12
8 , R -> 9
10, * -> 10
10, i -> 11
10, L -> 12
10, R -> 13
文法是 LR(1) 文法!
输出(分析表文件.lrtbl):
22
0 2 s4
0 3 s5
1 0 acc
2 0 r5
2 1 s8
3 0 r2
4 2 s4
4 3 s5
5 0 r4
5 1 r4
6 0 r3
6 1 r3
7 0 r5
7 1 r5
8 2 s10
8 3 s11
9 0 r1
10 2 s10
10 3 s11
11 0 r4
12 0 r5
13 1 r3
9
0 1 1
0 2 2
0 3 3
4 2 7
4 3 6
8 2 12
8 3 9
10 2 12
10 3 13
四、代码
#include <fstream>
#include <sstream>
#include <algorithm>
#include "iostream"
#include "set"
#include "string"
#include "map"
#include "vector"
using namespace std;
#ifndef BIANYI_WENFA_H
#define BIANYI_WENFA_H
class VN{ //非终结符集
private:
int n;
set<string> vn;
public:
int getN() const {
return n;
}
const set<string> &getVn() const {
return vn;
}
void setVn(const set<string> &vn) {
VN::vn = vn;
}
void setN(int n) {
VN::n = n;
}
};
class VT{ //终结符集
private:
int n;
set<string> vt;
public:
int getN() const {
return n;
}
void setN(int n) {
VT::n = n;
}
const set<string> &getVt() const {
return vt;
}
void setVt(const set<string> &vt) {
VT::vt = vt;
}
};
class P{ //产生式
private:
int n;
vector<string> p1; //存放左部
vector<vector<string>> p2; //存放右部
public:
const vector<vector<string>> &getP2() const {
return p2;
}
void setP2(const vector<vector<string>> &p2) {
P::p2 = p2;
}
const vector<string> &getP1() const {
return p1;
}
void setP1(const vector<string> &p1) {
P::p1 = p1;
}
int getN() const {
return n;
}
void setN(int n) {
P::n = n;
}
};
class S{ //开始符号
private:
string s;
public:
const string &getS() const {
return s;
}
void setS(const string &s) {
S::s = s;
}
};
class LR1program{ //项目类
public:
int pnum; //该项目所用产生式的序号
int dotpos; //活前缀在右部的位置
string subsequence; //后继符号
bool operator ==(const LR1program & a)const //重载==号
{
return (this->pnum== a.pnum && this->dotpos==a.dotpos && this->subsequence==a.subsequence );
}
};
class CFG{
private:
string filename;
VN vn;
VT vt;
P p;
S s;
map<int,vector<LR1program>> pro; //储存项目集簇
string lr1table; //分析表文件
public:
void setFilename(const string &filename) {
CFG::filename = filename;
}
void setLr1Table(const string &lr1Table) {
lr1table = lr1Table;
}
void fileop() { //读文件
string fileline;
string str;
set<string> middle;
vector<string> p1;
vector<string> mid;
vector<vector<string>> p2;
ifstream fout(filename);
for (int i = 1; getline(fout, fileline); i++) {
if (i == 1) {
vn.setN(atoi(fileline.c_str()));//非终结符个数
} else if (i == 2) { //非终结符集合
istringstream is(fileline);
while (is >> str) {
middle.insert(str);
}
vn.setVn(middle);
middle.clear();
} else if (i == 3) { //终结符个数
vt.setN(atoi(fileline.c_str()));
} else if (i == 4) { //终结符集合
istringstream is(fileline);
while (is >> str) {
middle.insert(str);
}
vt.setVt(middle);
middle.clear();
} else if (i == 5) { //规则个数
p.setN(atoi(fileline.c_str()));
} else if (i == p.getN() + 6) { //指定开始符
s.setS(fileline.c_str());
} else{
istringstream is(fileline);
for(int j=0;is>>str;j++){
if(j==0){
p1.push_back(str);
} else if(j==1);
else {
mid.push_back(str);
}
}
p2.push_back(mid);
mid.clear();
}
}
p.setP1(p1);
p.setP2(p2);
}
void outputCFG(){ //输出CFG
cout<<"CFG=(VN,VT,P,S)"<<endl;
cout<<"VN: ";
for(set<string>::iterator it=vn.getVn().begin();it!=vn.getVn().end();it++){
cout<<*it<<" ";
}
cout<<endl<<"VT: ";
for(set<string>::iterator it=vt.getVt().begin();it!=vt.getVt().end();it++){
cout<<*it<<" ";
}
cout<<endl<<"Production:"<<endl;
for(int i=0;i<p.getP1().size();i++){
cout<<" "<<i<<":"<<p.getP1()[i]<<" -> ";
for(int j=0;j<p.getP2()[i].size();j++){
cout<<p.getP2()[i][j];
}
cout<<endl;
}
cout<<"StartSymbol: ";
cout<<s.getS()<<endl;
}
set<string> First1(string str){ //求单个非终结符First集
set<string> first;
set<string> second; //中间集合
if(vt.getVt().count(str)==1){ //终结符本身即为其First集
first.insert(str);
return first;
}
for(int i=0;i<p.getP1().size();i++){
if(str==p.getP1()[i]){ //在左部集找到该非终结符
if(vt.getVt().count(p.getP2()[i][0])==1 || p.getP2()[i][0]=="ε"){ //如果产生式右部第一个字符为终结符或ε,则直接加入其First集
first.insert(p.getP2()[i][0]);
}
else if(vn.getVn().count(p.getP2()[i][0])==1){ //右部第一个为非终结符
for(int j=0;j<p.getP2()[i].size();j++){ //产生式右部遍历
if(vt.getVt().count(p.getP2()[i][j])==1){ //如果是终结符,加入first集,结束遍历
first.insert(p.getP2()[i][j]);
break;
}else if(vn.getVn().count(p.getP2()[i][j])==1 && j!=p.getP2()[i].size()-1){ //如果是非终结符
second =First1(p.getP2()[i][j]); //递归调用
if(second.count("ε")==0){
first.insert(second.begin(),second.end());
second.clear();
break;
}
else{
second.erase("ε");
first.insert(second.begin(),second.end());
second.clear();
}
} else if(vn.getVn().count(p.getP2()[i][j])==1 && j==p.getP2()[i].size()-1){
second =First1(p.getP2()[i][j]);
first.insert(second.begin(),second.end());
second.clear();
}
}
}
}
}
return first;
}
set<string> FIRSTstr(vector<string> str,int pos){ //求字符串的FIRST集
set<string> first;
set<string> midfirst;
for(int i=pos;i<str.size();i++){
if(vt.getVt().count(str[i])==1){
first.insert(str[i]);
break;
} else if(vn.getVn().count(str[i])==1){
midfirst=First1(str[i]);
first.insert(midfirst.begin(),midfirst.end());
if(midfirst.count("ε")==0){
break;
}
midfirst.clear();
}
}
first.insert("#"); //将结束符加入
first.erase("ε"); //去掉空串
return first;
}
bool isinI(vector<LR1program> I,LR1program pro){ //判断项目是否在项目集中
for(int i=0;i<I.size();i++){
if(pro.pnum==I[i].pnum && pro.dotpos==I[i].dotpos && pro.subsequence==I[i].subsequence){
return true;
}
}
return false;
}
vector<LR1program> Clourseone(vector<LR1program> pro){ //项目集的闭包算法
LR1program onepro;
vector<string> midv;
vector<LR1program> I; //储存一个项目集
set<string> midfirst;
for(int i=0;i<pro.size();i++){
I.push_back(pro[i]); //本身加入项目集
}
for(int k=0;k<I.size();k++){ //直到I不再增大
if(I[k].dotpos!=p.getP2()[I[k].pnum].size()-1 && vn.getVn().count(p.getP2()[I[k].pnum][I[k].dotpos+1])==1){
for(int j=I[k].dotpos+2;j<p.getP2()[I[k].pnum].size();j++){
midv.push_back(p.getP2()[I[k].pnum][j]);
}
midv.push_back(I[k].subsequence);
midfirst= FIRSTstr(midv,0);
midv.clear();
for(int i=0;i<p.getP1().size();i++){
if(p.getP2()[I[k].pnum][I[k].dotpos+1]==p.getP1()[i]){
onepro.pnum=i;
onepro.dotpos=-1;
for(set<string>::iterator it=midfirst.begin();it!=midfirst.end();it++){
onepro.subsequence=*it;
if(!isinI(I,onepro)){ //不在项目集中则加入
I.push_back(onepro);
}
}
}
}
midfirst.clear();
}
}
return I;
}
vector<LR1program> GO(vector<LR1program> I,string X){
vector<LR1program> nextI; //要转换到的项目集
for(int i=0;i<I.size();i++){
if(I[i].dotpos!=p.getP2()[I[i].pnum].size()-1 ){
if(p.getP2()[I[i].pnum][I[i].dotpos+1]==X){
I[i].dotpos++; //活前缀后移
nextI.push_back(I[i]);
}
}
}
nextI=Clourseone(nextI);
return nextI;
}
bool isinmap(map<int,vector<LR1program>> pro,vector<LR1program> midI){ //判断项目集是否在项目集簇中
for(int i=0;i<pro.size();i++){
if(midI==pro[i]){
return true;
}
}
return false;
}
int indexmap(map<int,vector<LR1program>> pro,vector<LR1program> midI){ //找到项目在项目集簇中的序号
for(int i=0;i<pro.size();i++){
if(midI==pro[i]){
return i;
}
}
return -1;
}
void program(){ //构造项目集簇
map<int,vector<LR1program>> pro; //储存项目集簇
vector<LR1program> midI;
LR1program begain; //初始项目
begain.pnum=0;
begain.dotpos=-1;
begain.subsequence="#";
midI.push_back(begain);
pro.emplace(0,Clourseone(midI)); //I0
midI.clear();
vector<string> X; //非终结符与终结符集合
for(set<string>::iterator it=vn.getVn().begin();it!=vn.getVn().end();it++){
if(*it!=s.getS())X.push_back(*it);
}
for(set<string>::iterator it=vt.getVt().begin();it!=vt.getVt().end();it++){
X.push_back(*it);
}
int k=1;
for(int i=0;i<pro.size();i++){ //直到pro不再增大
for(int j=0;j<X.size();j++){
midI=GO(pro[i],X[j]); //状态转换
if(!midI.empty()&& !isinmap(pro,midI)){
pro.emplace(k,midI);
k++;
}
midI.clear();
}
}
this->pro=pro;
cout<<"[LR(1) item set cluster]"<<endl; //输出项目集簇
for(int k=0;k<pro.size();k++){
cout<<"I"<<k<<":"<<endl;
for(int i=0;i<pro[k].size();i++){
cout<<p.getP1()[pro[k][i].pnum]<<"->";
for(int j=0;j<p.getP2()[pro[k][i].pnum].size();j++){
if(pro[k][i].dotpos==j-1){
cout<<".";
}
cout<<p.getP2()[pro[k][i].pnum][j];
}
if(pro[k][i].dotpos==p.getP2()[pro[k][i].pnum].size()-1){
cout<<".";
}
cout<<","<<pro[k][i].subsequence<<endl;
}
}
}
bool isLR1(){ //判断是否是LR1文法
set<string> check;
string str;
map<int,vector<LR1program>> pro; //储存项目集簇
pro= this->pro;
for(int i=0;i<pro.size();i++){
int x=0; //移进项目个数
int y=0; //归约项目个数
for(int j=0;j<pro[i].size();j++){
if(pro[i][j].dotpos==p.getP2()[pro[i][j].pnum].size()-1){
y++;
} else{
if(vt.getVt().count(p.getP2()[pro[i][j].pnum][pro[i][j].dotpos+1])==1){
x++;
}
}
}
if(y>1){
for(int j=0;j<pro[i].size();j++){
if(pro[i][j].dotpos==p.getP2()[pro[i][j].pnum].size()-1){
check.insert(pro[i][j].subsequence);
}
}
if(check.size()!=y)return false;
check.clear();
}
if((y>0&&x>0)){
for(int j=0;j<pro[i].size();j++){
if(pro[i][j].dotpos==p.getP2()[pro[i][j].pnum].size()-1){
str=pro[i][j].subsequence;
}
}
for(int j=0;j<pro[i].size();j++){
if(pro[i][j].dotpos!=p.getP2()[pro[i][j].pnum].size()-1){
if(p.getP2()[pro[i][j].pnum][pro[i][j].dotpos+1]==str){
return false;
}
}
}
}
}
return true;
}
void DFA(){
map<pair<int,string>,int> dfa;
pair<int,string> midp;
vector<LR1program> midv;
map<int,vector<LR1program>> pro; //储存项目集簇
vector<string> X; //非终结符与终结符集合
for(set<string>::iterator it=vn.getVn().begin();it!=vn.getVn().end();it++){
if(*it!=s.getS())X.push_back(*it);
}
for(set<string>::iterator it=vt.getVt().begin();it!=vt.getVt().end();it++){
X.push_back(*it);
}
pro= this->pro;
for(int i=0;i<pro.size();i++){
midp.first=i;
for(int j=0;j<X.size();j++){
midp.second=X[j];
midv=GO(pro[i],X[j]);
if(isinmap(pro,midv)){
int k=indexmap(pro,midv);
if(k!=-1){
dfa.emplace(midp,k);
}
}
}
}
cout<<"[LR(1) state tran function]"<<endl; //输出DFA
for(map<pair<int,string>,int>::iterator it=dfa.begin();it!=dfa.end();it++){
cout<<it->first.first<<","<<it->first.second<<"->"<<it->second<<endl;
}
if(isLR1()){
cout<<"文法是 LR(1) 文法!"<<endl;
} else{
cout<<"文法不是 LR(1) 文法!"<<endl;
}
}
bool issequence(vector<LR1program> midv,string X){ //判断后继符号
for(int i=0;i<midv.size();i++){
if(midv[i].dotpos==p.getP2()[midv[i].pnum].size()-1 && midv[i].subsequence==X)return true;
}
return false;
}
int rindex(vector<LR1program> midv,string X){
for(int i=0;i<midv.size();i++){
if(midv[i].dotpos==p.getP2()[midv[i].pnum].size()-1 && midv[i].subsequence==X)return midv[i].pnum;
}
}
void ActionGoto(){
map<int,vector<LR1program>> pro; //储存项目集簇
vector<LR1program> midv;
pro= this->pro;
vector<string> X; //终结符集合
vector<string> Y; //非终结符集合
map<pair<int,int>,pair<string,int>> action;
map<pair<int,int>,int> Goto;
pair<int,int> midp1;
pair<string,int> midp2;
for(set<string>::iterator it=vt.getVt().begin();it!=vt.getVt().end();it++){
X.push_back(*it);
}
X.push_back("#");
for(set<string>::iterator it=vn.getVn().begin();it!=vn.getVn().end();it++){
if(*it!=s.getS())Y.push_back(*it);
}
for(int i=0;i<pro.size();i++){
midp1.first=i;
for(int j=0;j<X.size();j++){
midp1.second=j;
midv=GO(pro[i],X[j]);
if(isinmap(pro,midv)){
int k=indexmap(pro,midv);
midp2.first="s"; //移进
midp2.second=k;
action.emplace(midp1,midp2);
}else{
if(issequence(pro[i],X[j])){
midp2.first="r";//归约
midp2.second= rindex(pro[i],X[j]);
if(midp2.second==0){
midp2.first="acc";//结束
}
action.emplace(midp1,midp2);
}
}
midv.clear();
}
for(int j=0;j<Y.size();j++){
midp1.second=j;
midv=GO(pro[i],Y[j]);
if(isinmap(pro,midv)){
int k=indexmap(pro,midv);
Goto.emplace(midp1,k);
}
midv.clear();
}
}
ofstream fout(this->lr1table);//输出分析表文件
fout<<action.size()<<endl;
for( map<pair<int,int>,pair<string,int>>::iterator it=action.begin();it!=action.end();it++){
if(it->second.first=="acc"){
fout<<it->first.first<<" "<<it->first.second<<" "<<it->second.first<<endl;
} else{
fout<<it->first.first<<" "<<it->first.second<<" "<<it->second.first<<it->second.second<<endl;
}
}
fout<<Goto.size()<<endl;
for(map<pair<int,int>,int>::iterator it=Goto.begin();it!=Goto.end();it++){
fout<<it->first.first<<" "<<it->first.second<<" "<<it->second<<endl;
}
fout.close();
}
};
#endif //BIANYI_WENFA_H
int main(){
string filename1;
string filename2;
cout<<"Please input file(absolute path; eg: D://filename.txt):"<<endl; //E://by//exp8.txt
cin>>filename1;
cout<<"Please input file(absolute path; eg: D://filename.txt):"<<endl; //E://by//lr1table.lrtbl
cin>>filename2;
CFG cfg;
cfg.setFilename(filename1);
cfg.setLr1Table(filename2);
cfg.fileop();
cfg.outputCFG();
cfg.program();
cfg.DFA();
cfg.ActionGoto();
return 0;
}