一个NFA转RG的简单java实现:
package hwk2;
//import com.alibaba.fastjson.JSON;
public class Nfa2Re {
// Step1 : Add state S and State A to the formal NFA
public static String[] states = new String[] {"S","q1","q2","q3","q4","A"};
public static String[] tokens = new String[] {"0","1","e"};
public static String[][][] thegma = new String[][][] {
{
{},{},{"q1"},
},
{
{"q1"},{"q1","q2"},{}
},{
{"q3"},{},{"q3"}
},{
{},{"q4"},{}
},{
{"q4"},{"q4"},{"A"}
},{
{},{},{}
}
};
public static String q1="q1";
public static String[] accept=new String[] {"q4"};
// maxtrix R, using to store edge between two state of GNFA
public static String[][] relation2=null;
public static void main(String[] args) {
initrelation(); // init matrix R
System.out.println(convert()); // print convert result
testdata(); // load another NFA to test the program
initrelation(); // init matrix R
System.out.println(convert()); // bad
}
// init relations between each two points
public static void initrelation() {
int states_num = states.length;
relation2 = new String[states_num][states_num];
for(int i=0;i<states.length;i++) {
for(int j=0;j<tokens.length;j++) {
for(String state :thegma[i][j]) {
int toindex=indexOf(states,state);
String tok = tokens[j];
if(relation2[i][toindex]==null) {
relation2[i][toindex]=tok;
}else {
relation2[i][toindex] = "("+relation2[i][toindex]+"U"+tok+")";
}
}
}
}
}
// convert GNFA to RE
public static String convert() {
for(int i=1;i<states.length-1;i++) {
// if is a circle, add * after it
if(relation2[i][i]!=null) {
if(!isClosed(relation2[i][i])) {
relation2[i][i] = "("+relation2[i][i]+")";
}
relation2[i][i]+="*";
}
// remove state step-by-step
for(int j=0;j<states.length;j++) {
for(int k=0;k<states.length;k++) {
if(relation2[j][i]!=null && relation2[i][k]!=null && j!=i && k!=i) {
// if(needCup(relation2[j][i])) relation2[j][i]="("+relation2[j][i]+")";
// if(needCup(relation2[i][k])) relation2[i][k]="("+relation2[i][k]+")";
String nstr = relation2[j][i] + relation2[i][k];
if(relation2[i][i]!=null) nstr=relation2[j][i] + relation2[i][i]+relation2[i][k];
if(relation2[j][k]==null || relation2[j][k].equals("")) {
relation2[j][k] = nstr;
}else {
if(!isClosed(nstr)) nstr = "("+nstr+")";
if(!isClosed(relation2[j][k])) relation2[j][k]="("+relation2[j][k]+")";
relation2[j][k]="("+relation2[j][k]+"U"+nstr+")";
}
}
}
}
// if a state is removed, edge is null
for(int l=0;l<states.length;l++) {
relation2[i][l]=null;
relation2[l][i]=null;
}
// System.out.println(JSON.toJSONString(relation2));
}
return relation2[0][states.length-1];//relation2[0][states.length-1].substring(1, relation2[0][states.length-1].length()-1);
}
// find the index of a element in array
public static int indexOf(Object[] a,Object b) {
int index=-1;
for(int i=0;i<a.length;i++) {
if(a[i].equals(b)) {
index=i;
break;
}
}
return index;
}
// judge whether need to add "()"
public static boolean isClosed(String str) {
if(indexOf(tokens, str)>=0) return true;
if(str.indexOf("(")==0 && str.lastIndexOf(")")==str.length()-1 ) {
int m=0;int itercount=0;
for(char chr :str.toCharArray()) {
itercount++;
if(chr=='(') m++;
if(chr==')') m--;
if(m==0) break; // the first ( is closed
}
if(itercount==str.length()) {// the closed char is the last char
return true;
}
}
return false;
}
// 判断待连接两边是否有类似 aUb cUd的形式
public static boolean needCup(String str) {
boolean isneed=false;
if(str.indexOf('U')>=0 && !isClosed(str)) {
return true;
}
return isneed;
}
// anther NFA
public static void testdata() {
states = new String[] {"S","q0","q1","q2","q3","A"};
tokens = new String[] {"a","b","e"};
thegma = new String[][][] {
{{},{},{"q0"}},
{{"q1"},{},{}},
{{"q3"},{"q2"},{}},
{{"q3"},{"q0"},{}},
{{},{},{"A"}},
{{},{},{}}
};
}
}
思路为:NFA的形式化作为程序输入,以NFA的状态间的邻接矩阵(存放两点间的转移字符串)为迭代基础,按照NFA转化为GNFA的过程,进行状态删去,同时更新被删除节点(状态)的前驱后继之间的字符串。更新考虑的内容依次为:
- 被删节点若有环(自己到达自己的字符串不为空),则取出环,改写为 (str)*
- 两两拼接被删节点前后的字符串,如果有环,则应把环拼接到两串之间
- 拼接好的串使用U并入前后两点之间的邻接矩阵的对应位置
运行效果如下:
有待改进的地方为:
- 邻接矩阵是稀疏矩阵,可以优化存储方式,以降低存储
- 加括号可以优化,以减少重复的括号
- ε \varepsilon ε可以设法删除优化
- 得到的正则表达式也许还可以进一步简化
- 程序更完善的输入输出