从网上搜了一篇正则表达式转换NFA的代码实现,链接:
https://blog.51cto.com/siwanghu/1705664
使用了 (ab|c) * abb这个正则表达式验证了一下,使用McMaughton-Yamada-Thompson算法进行转换,结果如下
但代码跑出来结果如下:
将其转换为状态转换图:
发现代码的结果无法表示 正则表达式 – abb ,也就是说代码对于闭包*这处理有问题,没有考虑闭包可以为 ε 的情况,仔细看了下代码:
开始想得比较简单,直接将
Edge edge1 = new Edge(endNode, begNode, "epsilon");
改为了
Edge edge1 = new Edge(begNode, endNode, "epsilon");
以为是nfa闭包的两个节点反了,跑起来发现结果如下:
但是发现这样少一个4–>3的空(ε)驱动.
最后对比了一下McMaughton-Yamada-Thompson算法中的归纳规则:
发现少了一条边,这条边对应到代码里边,就是少了这样一条边:
Edge edge4 = new Edge(graph.getEnd(), graph.getStart(), "epsilon");
(注意这里也要相应加上)
这样一改之后运行结果就对了!
化成状态转换图为:
代码如下(相较于原版,加了注释,并改了上述内容):
Node类:
package regextoNFA;
/**
* 状态结点类
*/
public class Node {
private int id;
private static int ID=0;
public Node() {
this.id = ID++;
}
public int getId() {
return id;
}
public static void reset(){
ID=0;
}
@Override
public String toString() {
return id+"";
}
}
Edge类:
package regextoNFA;
/**
* 连接两状态结点的边
* 标签label 即 驱动字符 其中 epsilon 即 ε
*/
public class Edge {
private Node begin;
private Node end;
private String label;
private Edge edge;
public Edge() {
super();
}
public Edge(Node begin, Node end) {
super();
this.begin = begin;
this.end = end;
}
public Edge(Node begin, Node end, String label) {
super();
this.begin = begin;
this.end = end;
this.label = label;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public Edge getEdge() {
return edge;
}
public void setEdge(Edge edge) {
this.edge = edge;
}
public Node getBegin() {
return begin;
}
public void setBegin(Node begin) {
this.begin = begin;
}
public Node getEnd() {
return end;
}
public void setEnd(Node end) {
this.end = end;
}
@Override
public String toString() {
return "Edge [begin="+begin+", end="+end+", label="+label+"]";
}
}
Regex类:
package regextoNFA;
import java.util.Stack;
public class Regex {
private String regex = "";
private Stack operatorStack;
private Stack operandStack;
private int[][] priority = {
{ 1, 1, 1, -1, 1, 1 }, // *&|()#
{ -1, 1, 1, -1, 1, 1 }, { -1, -1, 1, -1, 1, 1 },
{ -1, -1, -1, -1, 0, 2 }, { 1, 1, 1, 1, 1, 1 },
{ -1, -1, -1, -1, -1, -1 } };
public Regex() {
regex = "";
operatorStack = new Stack();
operandStack = new Stack();
}
public Regex(String _regex) {
regex = "";
operatorStack = new Stack();
operandStack = new Stack();
prepareString(_regex);
}
/**
* 核心转换代码
* stack 记录 NFA 片段
* 依次读取表达式的每个字符 ch
* 如果 ch 是运算符,从 stack 出栈所需数目的 NFA 片段,构建新的 NFA 片段后入栈 stack
* 如果 ch 是普通字符,创建新的状态,并构建只包含此状态的 NFA 片段入栈 stack
* 返回 stack 栈顶的 NFA 片段,即最终结果
*/
public Graph transformNFA() {
if (regex.length() == 0)
return null;
else {
int i = 0;
operatorStack.push('#');
char[] _regex = (regex + "#").toCharArray();
while (_regex[i] != '#'
|| (Character) (operatorStack.peek()) != '#') {
if (!isOperator(_regex[i])) {
operandStack.push((Character)_regex[i]);
i++;
} else {
int value=priorityOperator((Character)(operatorStack.peek()), _regex[i]);
switch (value) {
case 1:
Character character=(Character)operatorStack.pop();
switch (character) {
case '*':
Object obj=operandStack.pop();
Graph graph1=new Graph();
graph1.Star(obj);
operandStack.push(graph1);
break;
case '&':
Object obj2=operandStack.pop();
Object obj1=operandStack.pop();
Graph graph2=new Graph();
graph2.Concat(obj1, obj2);
operandStack.push(graph2);
break;
case '|':
Object obj4=operandStack.pop();
Object obj3=operandStack.pop();
Graph graph3=new Graph();
graph3.Union(obj3, obj4);
operandStack.push(graph3);
break;
default:
break;
}
break;
case 0:
operatorStack.pop();
i++;
break;
case -1:
operatorStack.push(_regex[i]);
i++;
break;
default:
break;
}
}
}
return (Graph) operandStack.pop();
}
}
public void reset(){
Node.reset();
operandStack.clear();
operatorStack.clear();
}
public String getRegex() {
return regex;
}
public void setRegex(String _regex) {
prepareString(_regex);
}
private boolean isOperator(Character character) {
String operatorString = "*&|()#";
if (operatorString.contains(character.toString())) {
return true;
} else {
return false;
}
}
private int priorityOperator(Character character1, Character character2) {
String priorityString = "*&|()#";
return this.priority[priorityString.indexOf(character1.toString())][priorityString
.indexOf(character2.toString())];
}
/**
* 在构建 NFA 之前,需要对正则表达式进行处理,以 (a|b)*abb 为例,在正则表达式里是没有连接符号的,这时就需要添加连接符
* 对当前字符类型进行判断,并对前一个字符进行判断,最终得到添加连接符之后的字符串
* 如 (a|b)*abb 添加完则为 (a|b)*&a&b&b
*/
private void prepareString(String _regex) {
char[] regexs = _regex.replaceAll(" ", "").toCharArray();
for (int i = 0; i < regexs.length; i++) {
if (i == 0)
regex += regexs[i];
else {
if (regexs[i] == '|' || regexs[i] == '*' || regexs[i] == ')') {
regex += regexs[i];
} else {
if (regexs[i - 1] == '(' || regexs[i - 1] == '|')
regex += regexs[i];
else
regex += ("&" + regexs[i]);
}
}
}
}
}
Graph类:
package regextoNFA;
import java.util.ArrayList;
import java.util.List;
public class Graph {
private List<Edge> edges;
private Node start;
private Node end;
public Graph() {
edges = new ArrayList<Edge>();
}
public List<Edge> getEdges() {
return edges;
}
public Node getStart() {
return start;
}
public void setStart(Node start) {
this.start = start;
}
public Node getEnd() {
return end;
}
public void setEnd(Node end) {
this.end = end;
}
public void reset() {
Node.reset();
}
/**
* 根据操作符和操作对象来进行连接、联合、闭包处理
* 由参数来判断调用构建NFA的函数
* 处理a*时会调用处理单字符闭包的函数,对应的也有处理NFA闭包的函数
*/
public void Star(Object obj) {
if (obj.getClass().getName().equals(Graph.class.getName())) {
addStar((Graph) obj);
return;
}
if (obj.getClass().getName().equals(Character.class.getName())) {
addStar((Character) obj);
return;
} else {
throw new RuntimeException("You have an error in your Regex syntax");
}
}
public void Union(Object obj1, Object obj2) {
if (obj1.getClass().getName().equals(Character.class.getName())) {
if (obj2.getClass().getName().equals(Graph.class.getName())) {
addUnion((Character) obj1, (Graph) obj2);
return;
}
if (obj2.getClass().getName().equals(Character.class.getName())) {
addUnion((Character) obj1, (Character) obj2);
return;
}
}
if (obj1.getClass().getName().equals(Graph.class.getName())) {
if (obj2.getClass().getName().equals(Graph.class.getName())) {
addUnion((Graph) obj1, (Graph) obj2);
return;
}
if (obj2.getClass().getName().equals(Character.class.getName())) {
addUnion((Graph) obj1, (Character) obj2);
return;
}
} else {
throw new RuntimeException("You have an error in your Regex syntax");
}
}
public void Concat(Object obj1, Object obj2) {
if (obj1.getClass().getName().equals(Character.class.getName())) {
if (obj2.getClass().getName().equals(Graph.class.getName())) {
addConcat((Character) obj1, (Graph) obj2);
return;
}
if (obj2.getClass().getName().equals(Character.class.getName())) {
addConcat((Character) obj1, (Character) obj2);
return;
}
}
if (obj1.getClass().getName().equals(Graph.class.getName())) {
if (obj2.getClass().getName().equals(Graph.class.getName())) {
addConcat((Graph) obj1, (Graph) obj2);
return;
}
if (obj2.getClass().getName().equals(Character.class.getName())) {
addConcat((Graph) obj1, (Character) obj2);
return;
}
} else {
throw new RuntimeException("You have an error in your Regex syntax");
}
}
/**
* 处理NFA闭包
*/
public void addStar(Graph graph) {
Node begNode = new Node();
Node endNode = new Node();
Edge edge1 = new Edge(begNode, endNode, "epsilon");
Edge edge2 = new Edge(begNode, graph.getStart(), "epsilon");
Edge edge3 = new Edge(graph.getEnd(), endNode, "epsilon");
/**
* 改动的地方
*/
Edge edge4 = new Edge(graph.getEnd(), graph.getStart(), "epsilon");
for (int i = 0; i < graph.getEdges().size(); i++) {
this.edges.add(graph.getEdges().get(i));
}
this.edges.add(edge1);
this.edges.add(edge2);
this.edges.add(edge3);
/**
* 改动的地方
*/
this.edges.add(edge4);
this.start = begNode;
this.end = endNode;
}
/**
* 处理单字符闭包
*/
public void addStar(Character character) {
Node nodeCenter = new Node();
Node nodebeg = new Node();
Node nodeend = new Node();
Edge edgeLink = new Edge(nodeCenter, nodeCenter, character.toString());
Edge edgeEpsilonBeg = new Edge(nodebeg, nodeCenter, "epsilon");
Edge edgeepsilonEnd = new Edge(nodeCenter, nodeend, "epsilon");
this.edges.add(edgeLink);
this.edges.add(edgeEpsilonBeg);
this.edges.add(edgeepsilonEnd);
this.start = nodebeg;
this.end = nodeend;
}
public void addUnion(Character character, Graph graph) {
Node begNode = new Node();
Node endNode = new Node();
Edge edge1 = new Edge(begNode, graph.getStart(), "epsilon");
Edge edge2 = new Edge(graph.getEnd(), endNode, "epsilon");
Edge edge3 = new Edge(begNode, endNode, character.toString());
for (int i = 0; i < graph.getEdges().size(); i++) {
this.edges.add(graph.getEdges().get(i));
}
this.edges.add(edge1);
this.edges.add(edge2);
this.edges.add(edge3);
this.start = begNode;
this.end = endNode;
}
public void addUnion(Graph graph, Character character) {
Node begNode = new Node();
Node endNode = new Node();
Edge edge1 = new Edge(begNode, graph.getStart(), "epsilon");
Edge edge2 = new Edge(graph.getEnd(), endNode, "epsilon");
Edge edge3 = new Edge(begNode, endNode, character.toString());
for (int i = 0; i < graph.getEdges().size(); i++) {
this.edges.add(graph.getEdges().get(i));
}
this.edges.add(edge1);
this.edges.add(edge2);
this.edges.add(edge3);
this.start = begNode;
this.end = endNode;
}
public void addUnion(Graph graph1, Graph graph2) {
Node begNode = new Node();
Node endNode = new Node();
Edge edge1 = new Edge(begNode, graph1.getStart(), "epsilon");
Edge edge2 = new Edge(begNode, graph2.getStart(), "epsilon");
Edge edge3 = new Edge(graph1.getEnd(), endNode, "epsilon");
Edge edge4 = new Edge(graph2.getEnd(), endNode, "epsilon");
this.start = begNode;
this.end = endNode;
for (int i = 0; i < graph1.getEdges().size(); i++) {
this.edges.add(graph1.getEdges().get(i));
}
for (int i = 0; i < graph2.getEdges().size(); i++) {
this.edges.add(graph2.getEdges().get(i));
}
this.edges.add(edge1);
this.edges.add(edge2);
this.edges.add(edge3);
this.edges.add(edge4);
}
public void addUnion(Character characterOne, Character characterTwo) {
Node nodebeg = new Node();
Node nodeend = new Node();
Edge edgeOne = new Edge(nodebeg, nodeend, characterOne.toString());
Edge edgeTwo = new Edge(nodebeg, nodeend, characterTwo.toString());
edges.add(edgeOne);
edges.add(edgeTwo);
start = nodebeg;
end = nodeend;
}
public void addConcat(Character character, Graph graph) {
Node begNode = new Node();
Edge edge = new Edge(begNode, graph.getStart(), character.toString());
for (int i = 0; i < graph.getEdges().size(); i++) {
this.edges.add(graph.getEdges().get(i));
}
this.edges.add(edge);
this.start = begNode;
this.end = graph.getEnd();
}
public void addConcat(Graph graph, Character character) {
Node endNode = new Node();
Edge edge = new Edge(graph.getEnd(), endNode, character.toString());
for (int i = 0; i < graph.getEdges().size(); i++) {
this.edges.add(graph.getEdges().get(i));
}
this.edges.add(edge);
this.start = graph.getStart();
this.end = endNode;
}
public void addConcat(Graph graph1, Graph graph2) {
Edge edge = new Edge(graph1.getEnd(), graph2.getStart(), "epsilon");
this.start = graph1.getStart();
this.end = graph2.getEnd();
for (int i = 0; i < graph1.getEdges().size(); i++) {
this.edges.add(graph1.getEdges().get(i));
}
for (int i = 0; i < graph2.getEdges().size(); i++) {
this.edges.add(graph2.getEdges().get(i));
}
this.edges.add(edge);
}
public void addConcat(Character characterOne, Character characterTwo) {
Node begNode = new Node();
Node midNode = new Node();
Node endNode = new Node();
Edge edge1 = new Edge(begNode, midNode, characterOne.toString());
Edge edge2 = new Edge(midNode, endNode, characterTwo.toString());
this.start = begNode;
this.end = endNode;
this.edges.add(edge1);
this.edges.add(edge2);
}
@Override
public String toString() {
String printString = "Start=" + this.start + " End=" + this.end + "\n";
for (int i = 0; i < edges.size(); i++) {
printString += edges.get(i) + "\n";
}
return printString;
}
}
运行/修改想转换的正则表达式的类:
package regextoNFA;
public class RegexToNfa {
public static void main(String[] args) {
try {
String regexString1 = "(ab|c)*abb";
Regex regex1 = new Regex(regexString1);
System.out.println(regex1);
System.out.println(regex1.getRegex());
System.out.println(regex1.transformNFA());
regex1.reset();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
}
最后感谢原作大佬写出来的大佬!!!挽救可怜的孩子于水火之中_(: * 」∠)_