正则表达式的基本运算
正则表达式有三种基本的运算:
- 连接(Concatenation), 例如 abc, 由a, b, c组成
- 联合(Union), 例如 a|b|c, 表示a或者b或者c
- Kleene闭包(Kleene *), 例如 (ab)*, 表示ab串不出现,或者出现1次或一次以上
其它的运算如+, {}等都可以用以上三种基本运算或者运算的组合来表示。
正则表达式转换成NFA
1)(a)R=AB (b)R=A|B (c)R=A*
r=s|t
r=st
r=s*
2)例题3.26:为正则表达式r=(a|b)*abb构造一个NFA
①对于(a|b)中的a构造NFA
②对于(a|b)中的b构造NFA
③而(a|b)构造NFA,将上面两个图合并
④(a|b)*构造NFA
⑤对于abb中的a构造NFA,对于abb中的两个b构造NFA同④类似
⑥将④⑤合并之后
⑦最终的NFA是
NFA是捏着正则式去比文本,吃掉一个字符,就把它跟正则式比较,匹配就记下来:“某年某月某日在某处匹配上了!”,然后接着往下干。一旦不匹配,就把刚吃的这个字符吐出来,一个个的吐,直到回到上一次匹配的地方。
正则表达式转换成NFA代码
1.正则表达式后缀化《逆波兰表达式&后缀表达式》,并将其中操作符进行修改
public static enum Operator {
DIVIDE('|', 1),
ADD('&', 2),MULTIPLY('*', 2),
LEFT_BRACKET('(', 3), RIGHT_BRACKET(')', 3);
char value;
int priority;
//...
}
2.构造NFA源码
public class NfaAlgorithm {
private static String prepareString(String _regex) {
String regex = "";
char[] regexs = _regex.replaceAll(" ", "").toCharArray();
for (int i = 0; i < regexs.length; i++) {
if (i == 0)
regex += regexs[i];
else {
if (regexs[i] == '|' || regexs[i] == '*' || regexs[i] == ')') {
regex += regexs[i];
} else {
if (regexs[i - 1] == '(' || regexs[i - 1] == '|')
regex += regexs[i];
else
regex += ("&" + regexs[i]);
}
}
}
return regex;
}
private static String E = "epsllon";
public static Graph transformNFA(String suffixRegex){
char[] rs = suffixRegex.toCharArray();
Stack<Object> operandStack = new Stack();
for (int i =0; i<rs.length; i++){
char o = rs[i];
if(!Operator.isOperator(o)){
operandStack.push(String.valueOf(o));
}else{
Operator operator = Operator.findOperator(o);
Graph graph = null;
switch (operator){
case ADD://'&'
Object av1 = operandStack.pop();
Object av2 = operandStack.pop();
graph = Graph.parse(av2);
graph.add(av1);
break;
case MULTIPLY://'*'
Object mv = operandStack.pop();
graph = Graph.parse(mv);
graph.multiply();
break;
case DIVIDE://'|'
Object dv1 = operandStack.pop();
Object dv2 = operandStack.pop();
graph = Graph.parse(dv2);
graph.divide(dv1);
break;
}
operandStack.push(graph);
}
}
return Graph.parse(operandStack.pop());
}
public static void main(String[] args) {
String exp = prepareString("(A*B|AC)D");
//String exp = prepareString("AB");
System.out.println(exp);
String regex = SuffixAlgorithm.rpn(exp);
System.out.println(regex);
Graph graph = transformNFA(regex);
System.out.println(graph);
}
static class Graph {
private List<Edge> edges;
private Node start;
private Node end;
private Graph(String label){
this.start = new Node();
this.end = new Node();
this.edges = new ArrayList<>();
Edge edge = new Edge(start,end,label);
this.edges.add(edge);
}
public static Graph parse(Object obj){
if(obj instanceof Graph){
return (Graph) obj;
}
if(obj instanceof String)
return new Graph((String)obj);
throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
}
public void add(Object obj){
if(obj instanceof String){
add((String)obj);
return;
}
if(obj instanceof Graph) {
add((Graph) obj);
return;
}
throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
}
public void divide(Object obj){
if(obj instanceof String){
divide(parse(obj));
return;
}
if(obj instanceof Graph) {
divide((Graph) obj);
return;
}
throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
}
public void multiply(){
Node nodeStart = this.start;
Node nodeEnd = this.end;
this.start = new Node();
this.end = new Node();
Edge edge1 = new Edge(nodeEnd,nodeStart,E);
Edge edge2 = new Edge(this.start,nodeStart,E);
Edge edge3 = new Edge(nodeEnd,this.end,E);
Edge edge4 = new Edge(this.start,this.end,E);
this.edges.add(edge1);
this.edges.add(edge2);
this.edges.add(edge3);
this.edges.add(edge4);
}
private void divide(Graph graph){
Node nodeStart = this.start;
Node nodeEnd = this.end;
this.start = new Node();
this.end = new Node();
Edge edge1 = new Edge(this.start,graph.start,E);
Edge edge2 = new Edge(this.start,nodeStart,E);
Edge edge3 = new Edge(nodeEnd,this.end,E);
Edge edge4 = new Edge(graph.end,this.end,E);
this.edges.add(edge1);
this.edges.add(edge2);
this.edges.add(edge3);
this.edges.add(edge4);
this.edges.addAll(graph.edges);
}
private void add(String label){
Node mid = end;
this.end = new Node();
Edge edge = new Edge(mid,end,label);
this.edges.add(edge);
}
private void add(Graph graph){
Node node = this.end;
this.end = graph.end;
edges.stream().filter(edge -> edge.end == node)
.forEach(edge -> {
edge.end = graph.start;
});
this.edges.addAll(graph.edges);
}
@Override
public String toString() {
String printString = "Start=" + this.start + " End=" + this.end + "\n";
for (int i = 0; i < edges.size(); i++) {
printString += edges.get(i) + "\n";
}
return printString;
}
}
//边
static class Edge {
private Node begin;
private Node end;
private String label;
public Edge(Node begin, Node end, String label) {
this.begin = begin;
this.end = end;
this.label = label;
}
@Override
public String toString() {
return "Edge [begin=" + begin + ", end=" + end + ", label=" + label+ "]";
}
}
//节点
static class Node {
private int id;
private static int ID=0;
public Node(){
this.id=ID++;
}
public int getId() {
return id;
}
public static void reset(){
ID=0;
}
@Override
public String toString() {
return id+"";
}
}
}
参考文章: