一道Google的面试题 - Simplified Regular Expression Parser

一、题目如下:
--------------------------
Write a parser for a simplified regular expression
On an alphabet set [a-z], a simplified regular expression is much simpler than the normal regular expression.

It has only two meta characters: '.' and '*'.

'.' -- exact one arbitrary character match.

'*' -- zero or more arbitrary character match.
--------------------------

具个例子,表达式 ab.*d 能匹配 'abcd', 'abccd', 'abad'。 不能匹配'abc', ' '(空字符串), 'abd'。

二、解法:

解析给定的表达式
生成对应的 NFA(Nondeterministic Finite Automation)
NFA转化为DFA(Deterministic Finite Automation)
利用DFA判断输入字符串

不懂正则表达式? 参考http://deerchao.net/tutorials/regex/regex.htm
不懂NFA? 参考http://planning.cs.uiuc.edu/node558.html
不懂DFA?参考http://lambda.uta.edu/cse5317/notes/node8.html

三、相关代码:
FiniteAutomata.java 和State.java,随手写写,请选择性参考。

FiniteAutomata.java

package parser;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;

public class FiniteAutomata {

private State start;

private final static char[] inputs;

static {
inputs = new char[26];
for (char i = 0; i < 26; i++) {
inputs[i] = (char) ('a' + i);
}
}

private final char episilon = 0;

private char stateId;

public void compile(String pattern) {
this.stateId = 'A';
State NFA = toNFA(pattern);
this.start = convertToDFA(NFA);
}

private State convertToDFA(State NFA) {
Map<Set<State>, State> cache = new HashMap<Set<State>, State>();
Queue<Set<State>> queue = new LinkedList<Set<State>>();

// start state of DFA
Set<State> start = episilon(NFA);
State starter = nextState();
cache.put(start, starter);
queue.offer(start);
while (!queue.isEmpty()) {
Set<State> currentEpisilon = queue.poll();
State current = cache.get(currentEpisilon);
// create new State
for (char input : inputs) {
Set<State> nextEpisilon = move(currentEpisilon, input);
if (nextEpisilon.isEmpty()) {
continue;
}
State next;
if (!cache.containsKey(nextEpisilon)) {
next = nextState();
cache.put(nextEpisilon, next);
queue.offer(nextEpisilon);
} else {
next = cache.get(nextEpisilon);
}
boolean isAccept = containsAcceptState(nextEpisilon);
next.setAccept(isAccept);
current.setTransition(input, next);
}
}
return starter;
}

private Set<State> move(Set<State> currentEpisilon, char input) {
Set<State> result = new HashSet<State>();
for (State s : currentEpisilon) {
State next = s.transit(input);
if (next != null) {
result.add(next);
}
}
return episilon(result);
}

private boolean containsAcceptState(Set<State> nextEpisilon) {
for (State state : nextEpisilon) {
if (state.isAccept()) {
return true;
}
}
return false;
}

private Set<State> episilon(State s) {
Set<State> cache = new HashSet<State>();
cache.add(s);
return episilon(s, cache);
}

private Set<State> episilon(Set<State> states) {
Set<State> cache = new HashSet<State>();
for (State s : states) {
cache.add(s);
cache.addAll(episilon(s, cache));
}
return cache;
}

private Set<State> episilon(State s, Set<State> cache) {
State next = s.transit(episilon);
if (next != null && !cache.contains(next)) {
cache.add(next);
cache.addAll(episilon(s, cache));
}
return cache;
}

private State toNFA(String pattern) {
char[] expr = pattern.toCharArray();
State currentState = nextState();
State starter = currentState;
for (char c : expr) {
if (c == '.') {
State nextState = nextState();
// add each transition for all inputs
for (char i : inputs) {
currentState.setTransition(i, nextState);
}
currentState = nextState;
} else if (c == '*') {
State nextState = nextState();
// add each transition for all inputs
for (char i : inputs) {
currentState.setTransition(i, nextState);
}
currentState.setTransition(episilon, nextState);
nextState.setTransition(episilon, currentState);
currentState = nextState;
} else if (c >= 'a' && c <= 'z') {
State nextState = nextState();
currentState.setTransition(c, nextState);
currentState = nextState;
} else {
throw new RuntimeException("Unrecognized expression");
}
}
currentState.setAccept(true);
return starter;
}

private State nextState() {
return new State(stateId++);
}

public void constructDFA(Map<State, Map<Character, State>> mapping) {
Iterator<Map.Entry<State, Map<Character, State>>> it = mapping
.entrySet().iterator();
while (it.hasNext()) {
Entry<State, Map<Character, State>> entry = it.next();
State state = entry.getKey();
Map<Character, State> transition = entry.getValue();
state.setTransition(transition);
}
}

public boolean match(String c) {
char[] candidate = c.toCharArray();
for (char d : candidate) {
start = start.transit(d);
if (start == null) {
return false;
}
}
return start.isAccept();
}

public static String[] patterns = { "abc", "*", "*abc", "*abc", "a*bc",
"a*bc", "a*", "a*", "a*", "a*", "*abc*", "*****", "...", ".*",
".bc*", ".b*c*a", "*", "abc", "*a", "a", ".a*c", "a.*b", "..", "",
"" };

public static String[] candidates = { "abc", "abc", "abc", "aaabbbabc",
"aaabbbabc", "abc", "abc", "a", "aa", "abcdef", "abc", "abc",
"abc", "abc", "abc", "abca", "", "abcd", "abcd", "", "abc", "abc",
"abc", "", "abc" };

public static void main(String[] args) {
FiniteAutomata fa = new FiniteAutomata();
for (int i = 0; i < patterns.length; i++) {
fa.compile(patterns[i]);
System.out.println(fa.match(candidates[i]));
}
}

}


State.java

package parser;

import java.util.HashMap;
import java.util.Map;

public class State {

private boolean accept = false;

private char id;

private Map<Character, State> mapping = new HashMap<Character, State>();

public State(char c) {
this.id = c;
}

public State(char c, boolean isAccept) {
this.id = c;
this.accept = isAccept;
}

public State transit(char input) {
return mapping.get(input);
}

public void setTransition(char input, State next) {
this.mapping.put(input, next);
}

public void setTransition(Map<Character, State> mapping) {
this.mapping = mapping;
}

public boolean isAccept() {
return this.accept;
}

public void setAccept(boolean b) {
this.accept = b;
}

@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + id;
return result;
}

@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
State other = (State) obj;
if (id != other.id)
return false;
return true;
}

@Override
public String toString() {
return "State [id=" + id + "]";
}

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值