接上一篇词法分析器第二部分

//接上篇

// 定义DFA的构造类
class DFA
{
public:
 DFA();
 ~DFA();
 void GetRegExp();
 void InsertCatNode();
 void RegExpToPost();
 void GetEdgeNumber();
 void ThompsonConstruction();
 void SubsetConstruction();
 void FindMatchingPatternInFile();
private:
 char *exp;
 char *post;
 char *edge;
 int edgeNumber;
 int **DStates;
 int **Dtran;
 int *AcceptStates;
 int DStatesNumber;
 int DtranNumber;
 int NFAStatesNumber;
 int DFAStatesNumber;
 AdjacentTable *NFATable;
 TransitionTable *DFATable;
 int Precedence(char symbol);
 int CompArray(int *t1, int *t2);
 int MinimizeDFAStates(int **Dtran, int *AcceptStates, int DtranNumber, int edgeNumber);
 void RemoveFirstSymbol(char *buf, int &len);
};
// 构造函数
DFA::DFA()
{
 exp = new char[128];
 post = new char[128];
 edge = new char[128];
    edgeNumber = 0;
 NFAStatesNumber = 0;
 DFAStatesNumber = 0;
 DStatesNumber = 0;
 DtranNumber = 0;
 NFATable = new AdjacentTable();
}
// 析构函数
DFA::~DFA()
{
 delete [] exp;
 delete [] post;
 delete [] edge;
 delete [] AcceptStates;
 NFATable->Clear();
 DFATable->Clear();
}
// 获取输入
void DFA::GetRegExp()
{
 cout << "在下面输入正则表达式" << endl;
 cin >> exp;
 for (int i=0; exp[i]!='/0'; i++)
 {
  if (exp[i] == '~')
  {
   cout << "/n字符'~'已经被程序禁用!" << endl;
   getch();
   exit(1);
  }
 }
 cout << "/n------------------------" << endl;
}
// 加入cat-node作为连结点标志
void DFA::InsertCatNode()
{
 int i = 0, j, len = strlen(exp);
 while (exp[i+1] != '/0')
 {
  if (((exp[i] != '(' && exp[i] != '.' && exp[i] != '|') 
   || exp[i] == ')' 
   || exp[i] == '*')
   && (exp[i+1] != ')' && exp[i+1] != '.' && exp[i+1] != '|' && exp[i+1] != '*'))
  {
   for (j=len; j>i+1; j--)
   {
    exp[j] = exp[j-1];
   }
   exp[i+1] = '.';
   len++;
   exp[len] = '/0';
   i++;
  }
  i++;
 }
 cout << "/n第一步: 加入连结点/n"
   << exp << "/n"
   << "字符串长度: " << len 
   << "/n/n------------------------" << endl;
}
// 定义运算符的优先级
int DFA::Precedence(char symbol)
{
 int priority;
 switch (symbol)
 {
 case '|': priority = 1; break;
 case '.': priority = 2; break;
 case '*': priority = 3; break;
 default:  priority = 0; break;
 }
 return priority;
}
// 将正则式转为逆波兰式
void DFA::RegExpToPost()
{
 int i = 0, j = 0;
 char ch, cl;
 strcpy(post, "/0");
 LStack<char> *ls = new LStack<char>();
 ls->Clear();
 ls->Push('#');
 ch = exp[i];
 while (ch != '/0')
 {
  if (ch == '(')
  {
   ls->Push(ch);
   ch = exp[++i];
  }
  else if (ch == ')')
  {
   while (ls->GetTop() != '(')
   {
    post[j++] = ls->Pop();
   }
            ls->Pop();
   ch = exp[++i];
  }
  else if ((ch == '|') || (ch == '*') || (ch == '.'))
  {
      cl = ls->GetTop();
      while (Precedence(cl) >= Precedence(ch))
   {
       post[j++] = cl;
       ls->Pop();
       cl = ls->GetTop();
   }
   ls->Push(ch);
   ch = exp[++i];
  }
  else
  {
   post[j++] = ch;
   ch = exp[++i];
  }
 }
 ch = ls->Pop();
 while ((ch == '|') || (ch == '*') || (ch == '.'))
 {
  post[j++] = ch;
  ch = ls->Pop();
 }
 post[j] = '/0';
 ls->Clear();
 cout << "/n第二步: 转为后缀式/n"
   << post << "/n"
   << "字符串长度: " << strlen(post) 
   << "/n/n------------------------" << endl;
}
// 扫描逆波兰式中除运算符以外的字符的数目
void DFA::GetEdgeNumber()
{
 int i = 0, j;
 edgeNumber = 0;
 while (post[i] != '/0')
 {
  if (post[i] == '.' || post[i] == '|' || post[i] == '*')
  {
   i++;
   continue;
  }
  for (j=0; j<edgeNumber; j++)
  {
   if (post[i] == edge[j])
   {
    break;
   }
  }
  if (j == edgeNumber)
  {
   edge[edgeNumber] = post[i];
   edgeNumber++;
  }
  i++;
 }
 edge[edgeNumber] = '/0';
 cout << "/n第三步: 获取字符集/n";
 for (i=0; i<edgeNumber; i++)
 {
  cout << edge[i] << ' ';
 }
 cout << "/n字符个数: " << edgeNumber 
   << "/n/n------------------------" << endl;
}
// 用Thompson构造法构造NFA
void DFA::ThompsonConstruction()
{
 int i, j;
 char ch;
 int s1, s2;
 LStack<int> *states = new LStack<int>();
 states->Clear();
 if (strlen(post) < 1)
 {
  cout << "No Valid Regular Expression Found!" << endl;
  getch();
  exit(1);
 }
 NFATable->SetValue(0, 0);
 i = 1;
 j = 0;
 ch = post[j];
    while (ch != '/0')
 {
  if (ch == '.')
  {
   s2 = states->Pop();
   int temp1 = states->Pop();
   int temp2 = states->Pop();
   s1 = states->Pop();
   NFATable->InsertEdgeByValue(temp2, temp1, '~');
   states->Push(s1);
   states->Push(s2);
  }
  else if (ch == '|')
  {
   s2 = states->Pop();
   int temp1 = states->Pop();
   int temp2 = states->Pop();
   s1 = states->Pop();
   NFATable->InsertVertex(i);
   NFATable->InsertVertex(i+1);
   NFATable->InsertEdgeByValue(i, s1, '~');
   NFATable->InsertEdgeByValue(i, temp1, '~');
   NFATable->InsertEdgeByValue(temp2, i+1, '~');
   NFATable->InsertEdgeByValue(s2, i+1, '~');
   s1 = i;
   s2 = i+1;
   states->Push(s1);
   states->Push(s2);
   i += 2;
  }
  else if (ch == '*')
  {
   s2 = states->Pop();
   s1 = states->Pop();
   NFATable->InsertVertex(i);
   NFATable->InsertVertex(i+1);
   NFATable->InsertEdgeByValue(i, i+1, '~');
   NFATable->InsertEdgeByValue(s2, s1, '~');
   NFATable->InsertEdgeByValue(i, s1, '~');
   NFATable->InsertEdgeByValue(s2, i+1, '~');
   s1 = i;
   s2 = i+1;
   states->Push(s1);
   states->Push(s2);
   i += 2;
  }
  else
  {
   NFATable->InsertVertex(i);
   NFATable->InsertVertex(i+1);
   NFATable->InsertEdgeByValue(i, i+1, ch);
   s1 = i;
   s2 = i+1;
   states->Push(s1);
   states->Push(s2);
   i += 2;
  }
  j++;
  ch = post[j];
 }
 s2 = states->Pop();
 s1 = states->Pop();
 NFATable->InsertEdgeByValue(0, s1, '~');
 if (! states->IsEmpty())
 {
  cout << "Some error in your input string!" << endl;
  getch();
  exit(1);
 }
 NFAStatesNumber = s2 + 1;
}
// 比较两个一维数组是否含有完全相同的元素
int DFA::CompArray(int *t1, int *t2)
{
 int i = 0, j = 0, len1, len2;
 while (t1[i] != -1)
 {
  i++;
 }
 len1 = i;
 while (t2[j] != -1)
 {
  j++;
 }
 len2 = j;
 if (len1 != len2)
 {
  return 0;
 }
 for (i=0; i<len1; i++)
 {
  for (j=0; j<len2; j++)
  {
   if (t1[i] == t2[j])
   {
    break;
   }
  }
  if (j == len2)
  {
   return 0;
  }
 }
 return 1;
}
// 最小化Dtran表
int DFA::MinimizeDFAStates(int **Dtran, int *AcceptStates, int DtranNumber, int edgeNumber)
{
 int h, i, j, k, l;
 for (i=0; i<DtranNumber-1; i++)
 {
  for (j=i+1; j<DtranNumber; j++)
  {
   if (AcceptStates[i] == AcceptStates[j])
   {
    for (k=0; k<edgeNumber; k++)
    {
     if (Dtran[i][k] != Dtran[j][k])
     {
      break;
     }
    }
    if (k == edgeNumber)
    {
     for (l=j; l<DtranNumber-1; l++)
     {
      for (k=0; k<edgeNumber; k++)
      {
       Dtran[l][k] = Dtran[l+1][k];
      }
      AcceptStates[l] = AcceptStates[l+1];
     }
     for (l=0; l<DtranNumber-1; l++)
     {
      for (k=0; k<edgeNumber; k++)
      {
       if (Dtran[l][k] == j)
       {
        Dtran[l][k] = i;
       }
      }
     }
     for (h=j; h<DtranNumber; h++)
     {
      for (l=0; l<DtranNumber-1; l++)
      {
       for (k=0; k<edgeNumber; k++)
       {
        if (Dtran[l][k] == h+1)
        {
         Dtran[l][k] = h;
        }
       }
      }
     }
     DtranNumber--;
     j--;
    }
   }
  }
 }
 return DtranNumber;
}
// 用子集构造法构造DFA
void DFA::SubsetConstruction()
{
 int i, j, k;
 DStatesNumber = 0;
 DtranNumber = 0;
 // 输出NFA状态表
 cout << "/n第四步: 构造NFA/n/n";
    NFATable->OutputNFA();
 cout << "/n------------------------" << endl;
 // 初始化DStates和Dtran两个构造表及AcceptStates数组
 DStates = (int**)(new int*[NFAStatesNumber+1]);
 for (i=0; i<NFAStatesNumber+1; i++)
 {
  DStates[i] = new int[NFAStatesNumber+1];
 }
 Dtran = (int**)(new int*[NFAStatesNumber+1]);
 for (i=0; i<NFAStatesNumber+1; i++)
 {
  Dtran[i] = new int[edgeNumber+1];
 }
 for (i=0; i<NFAStatesNumber+1; i++)
 {
  for (j=0; j<edgeNumber+1; j++)
  {
   Dtran[i][j] = -1;
  }
 }
 AcceptStates = new int[NFAStatesNumber+1];
 for (i=0; i<NFAStatesNumber+1; i++)
 {
  AcceptStates[i] = 0;
 }
 // 调用闭包函数和移动函数构造DStates和Dtran两个表及AcceptStates数组
    int *T = new int[NFAStatesNumber+1];
 int *temp = new int[NFAStatesNumber+1];
 T[0] = 0;
 T[1] = -1;
 T = NFATable->Closure(T);
 DStates[DStatesNumber] = T;
 DStatesNumber++;
 k = 0;
 while (k < DStatesNumber)
 {
  for (i=0; edge[i]!='/0'; i++)
  {
   temp = NFATable->Closure(NFATable->Move(T, edge[i]));
   if (temp[0] != -1)
   {
    for (j=0; j<DStatesNumber; j++)
    {
     if (CompArray(temp, DStates[j]) == 1)
     {
      Dtran[k][i] = j;
      break;
     }
    }
    if (j == DStatesNumber)
    {
     DStates[DStatesNumber] = temp;
     Dtran[k][i] = DStatesNumber;
     DStatesNumber++;
    }
   }
  }
  k++;
  T = DStates[k];
 }
 DtranNumber = k;
 for (i=0; i<DStatesNumber; i++)
 {
  for (j=0; DStates[i][j]!= -1; j++)
  {
   if (DStates[i][j] == NFAStatesNumber - 1)
   {
    AcceptStates[i] = 1;
    break;
   }
  }
 }
 // 输出DStates表项
 cout << "/n第五步: 构造DStates表/n/n";
 for (i=0; i<DStatesNumber; i++)
 {
  cout << "项目" << i <<":  ";
  j = 0;
  while (DStates[i][j] != -1)
  {
   cout << DStates[i][j] << " ";
   j++;
  }
  cout << endl;
 }
 cout << "/n------------------------" << endl;
 // 输出Dtran表项
 cout << "/n第六步: 构造Dtran表/n/n状态 ";
 for (j=0; j<edgeNumber; j++)
 {
  cout << "    " << edge[j];
 }
 cout << "    是否接收" << endl;
 for (i=0; i<DtranNumber; i++)
 {
  if (i < 10)  cout << "   " << i << " ";
  else if (i < 100)  cout << "  " << i << " ";
  else if (i < 1000)  cout << " " << i << " ";
  else  cout << i << " ";
  for (j=0; j<edgeNumber; j++)
  {
   if (Dtran[i][j] < 0)  cout << "     ";
   else if (Dtran[i][j] < 10)  cout << "    " << Dtran[i][j];
   else if (Dtran[i][j] < 100)  cout << "   " << Dtran[i][j];
   else if (Dtran[i][j] < 1000)  cout << "  " << Dtran[i][j];
   else  cout << " " << Dtran[i][j];
  }
  if (AcceptStates[i] == 1)
  {
   cout << "    Acc";
  }
  cout << endl;
 }
 cout << "/n------------------------" << endl;
 // 反复检测Dtran表中是否能化简,能则继续化简
 int DtranNumberAfterMinimization = MinimizeDFAStates(Dtran, AcceptStates, DtranNumber, edgeNumber);
 while (DtranNumberAfterMinimization != DtranNumber)
 {
  DtranNumber = DtranNumberAfterMinimization;
  DtranNumberAfterMinimization = MinimizeDFAStates(Dtran, AcceptStates, DtranNumber, edgeNumber);
 }
 // 将Dtran表的内容拷贝到DFA表
 DFATable = new TransitionTable(DtranNumber, edgeNumber);
 for (i=0; i<DtranNumber; i++)
 {
  for (j=0; j<edgeNumber; j++)
  {
   DFATable->SetValue(i, j, Dtran[i][j]);
  }
 }
 // 输出DFA状态表
 cout << "/n第七步: 构造最小DFA/n/n状态 ";
 for (j=0; j<edgeNumber; j++)
 {
  cout << "    " << edge[j];
 }
 cout << "    是否接收" << endl;
 for (i=0; i<DtranNumber; i++)
 {
  if (i < 10)  cout << "   " << i << " ";
  else if (i < 100)  cout << "  " << i << " ";
  else if (i < 1000)  cout << " " << i << " ";
  else  cout << i << " ";
  for (j=0; j<edgeNumber; j++)
  {
   if (DFATable->GetValue(i, j) < 0)  cout << "     ";
   else if (DFATable->GetValue(i, j) < 10)  cout << "    " << DFATable->GetValue(i, j);
   else if (DFATable->GetValue(i, j) < 100)  cout << "   " << DFATable->GetValue(i, j);
   else if (DFATable->GetValue(i, j) < 1000)  cout << "  " << DFATable->GetValue(i, j);
   else  cout << " " << DFATable->GetValue(i, j);
  }
  if (AcceptStates[i] == 1)
  {
   cout << "    Acc";
  }
  cout << endl;
 }
 cout << "/n------------------------" << endl;
 // 析构DStates和Dtran表以及AcceptStates数组
 for (i=0; i<NFAStatesNumber+1; i++)
 {
  delete [] DStates[i];
  delete [] Dtran[i];
 }
 delete [] DStates;
 delete [] Dtran;
}
// 从字符串中删除第一个字符
void DFA::RemoveFirstSymbol(char *buf, int &len)
{
 for (int i=0; i<len-1; i++)
 {
  buf[i] = buf[i+1];
 }
 len--;
 buf[len] = '/0';
}
// 从ASCII文件中找出所有的匹配式
void DFA::FindMatchingPatternInFile()
{
 cout << "/n第八步: 查找匹配字符串/n/n";
 // 打开一个只读文件和一个写文件
 char filePath[128];
 cout << "请输入要打开的文件" << endl;
 cin >> filePath;
 fstream infile;
 infile.open(filePath, ios::in|ios::nocreate);
 if (! infile)
 {
  cout << "/n打开文件" << filePath << "失败!" << endl;
  getch();
  exit(1);
 }
 cout << "请输入保存记录的文件" << endl;
 cin >> filePath;
 fstream outfile;
 outfile.open(filePath, ios::out|ios::trunc);
 if (! outfile)
 {
  cout << "/n建立文件" << filePath << "失败!" << endl;
  getch();
  exit(1);
 }
 // 开始验证正则表达式的匹配串
 int ln = 1, col = 0, bufHead = 0;
 int state = 0;
 int count = 0;
 char buf[256];
 int curr = 0;
 int len = 0;
 char ch;
 while (infile.get(ch))
 {
  if ((ch == '/n') || (ch == ' '))
  { 
   while (curr < len)
   {
    state = 0;
    while ((state != -1) && (curr < len))
    {
     state = DFATable->Transit(state, buf[curr], edge);
        if (AcceptStates[state] == 1)
     {
      outfile << "/nLn " << ln << ", Col " << bufHead <<": ";
      outfile.write(buf, curr+1);
         count++;
     }
     curr++;
    }
    RemoveFirstSymbol(buf, len);
    bufHead++;
    curr = 0;
   }
   if (ch == '/n')
   {
    ln++;
    col = 0;
   }
   if (ch == ' ')
   {
    col++;
   }
  }
  else
  {
   col++;
   if (len == 0)
   {
    bufHead = col;
   }
   buf[len++] = ch;
   if (len >= 256)
   {
    cout << "读取字符串的长度超过最大限额!" << endl;
    getch();
    exit(1);
   }
  }
 }
 while (curr < len)
 {
  state = 0;
    curr = 0;
  while ((state != -1) && (curr < len))
  {
   state = DFATable->Transit(state, buf[curr], edge);
      if (AcceptStates[state] == 1)
   {
    outfile << "/nLn " << ln << ", Col " << bufHead <<": ";
    outfile.write(buf, curr+1);
       count++;
   }
   curr++;
  }
  RemoveFirstSymbol(buf, len);
  bufHead++;
 }
 if (count > 0)
 {
  outfile << "/n/n一共找到" << count << "匹配字符串";
 }
 else
 {
  outfile << "/n/n没有找到任何匹配字符串";
 }
 infile.close();
 outfile.close();
 cout << "/n查找结果已保存在" << filePath << "中/n"
      << "/n------------------------" << endl;
}

//=====================================================

// 主函数
void main()
{
 DFA dfa;
 dfa.GetRegExp();
 dfa.InsertCatNode();
 dfa.RegExpToPost();
 dfa.GetEdgeNumber();
 dfa.ThompsonConstruction();
 dfa.SubsetConstruction();
 dfa.FindMatchingPatternInFile();
}

//=====================================================

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值