原题网址:https://leetcode.com/problems/regular-expression-matching/
Implement regular expression matching with support for '.'
and '*'
.
'.' Matches any single character.
'*' Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).
The function prototype should be:
bool isMatch(const char *s, const char *p)
Some examples:
isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false
isMatch("aa", "a*") → true
isMatch("aa", ".*") → true
isMatch("ab", ".*") → true
isMatch("aab", "c*a*b") → true
方法一:有限状态自动机。
public class Solution {
Map<Integer, Set<Transfer>> nfa = new HashMap<>();
int accept = 0;
private Set<Transfer> getTransfers(int state) {
Set<Transfer> transfers = nfa.get(state);
if (transfers == null) {
transfers = new HashSet<Transfer>();
nfa.put(state, transfers);
}
return transfers;
}
private Set<Integer> getEpsilonStates(int state) {
Set<Integer> states = new HashSet<>();
Iterator<Transfer> it = getTransfers(state).iterator();
while (it.hasNext()) {
Transfer transfer = it.next();
if (transfer.ch == (char)0) states.add(transfer.next);
}
return states;
}
private Set<Integer> getNextStates(int state, char ch) {
Set<Integer> states = new HashSet<>();
Iterator<Transfer> it = getTransfers(state).iterator();
while (it.hasNext()) {
Transfer transfer = it.next();
if (transfer.ch == ch || transfer.ch == '.') states.add(transfer.next);
}
return states;
}
private void addTransfer(int state, Transfer transfer) {
Set<Transfer> transfers = getTransfers(state);
transfers.add(transfer);
}
private void buildNFA(String re) {
int state = 0;
for(int i=0; i<re.length(); i++) {
int next = state + 1;
if (i<re.length()-1 && re.charAt(i+1) == '*') {
addTransfer(state, new Transfer((char)0, next));
addTransfer(next, new Transfer(re.charAt(i), next));
i ++;
} else {
addTransfer(state, new Transfer(re.charAt(i), next));
}
state = next;
}
accept = state;
// printNFA();
}
private void printNFA() {
System.out.println(this.nfa);
}
private Set<Integer> getClosure(Set<Integer> states) {
Set<Integer> closure = new HashSet<>();
closure.addAll(states);
Set<Integer> found = new HashSet<>();
found.addAll(states);
while (!found.isEmpty()) {
Set<Integer> epsilons = new HashSet<>();
Iterator<Integer> it = found.iterator();
while (it.hasNext()) {
int state = it.next();
epsilons.addAll(getEpsilonStates(state));
}
Iterator<Integer> eit = epsilons.iterator();
found = new HashSet<>();
while (eit.hasNext()) {
int state = eit.next();
if (closure.contains(state)) continue;
closure.add(state);
found.add(state);
}
}
return closure;
}
public boolean isMatch(String s, String p) {
buildNFA(p);
Set<Integer> states = new HashSet<>();
states.add(0);
for(int i=0; i<s.length(); i++) {
Set<Integer> closure = getClosure(states);
Set<Integer> nexts = new HashSet<>();
Iterator<Integer> it = closure.iterator();
while (it.hasNext()) {
nexts.addAll(getNextStates(it.next(), s.charAt(i)));
}
states = nexts;
}
states = getClosure(states);
return states.contains(accept);
}
}
class Transfer {
char ch;
int next;
Transfer(char ch, int next) {
this.ch = ch;
this.next = next;
}
@Override
public String toString() {
return "(" + ch + ", " + next + ")";
}
}
另一种实现:
public class Solution {
/*
"aa"
"a"
"aa"
"a*"
"aa"
"aaa"
"aa"
".*"
"a"
"ab*"
"a"
".*..a*"
*/
public boolean isMatch(String s, String p) {
NFA nfa = new NFA();
Integer start = nfa.addState();
Integer accept;
Integer current = start;
for(int i=0; i<p.length(); i++) {
if (i<p.length()-1 && p.charAt(i+1) == '*') {
Integer next = nfa.addState();
nfa.addNext(current, null, next);
nfa.addNext(next, p.charAt(i), next);
i++;
current = next;
} else {
Integer next = nfa.addState();
nfa.addNext(current, p.charAt(i), next);
current = next;
}
}
accept = current;
// nfa.print();
Set<Integer> states = new HashSet<>();
states.add(0);
states = nfa.getClosure(states);
// System.out.printf("Begins, states=%s\n", states);
for(int i=0; i<s.length(); i++) {
Set<Integer> nexts = nfa.getNexts(states, s.charAt(i));
states = nfa.getClosure(nexts);
}
return states.contains(accept);
}
}
class NFA {
private Map<Integer, Map<Character, Set<Integer>>> transfers = new HashMap<>();
void print() {
for(Integer state: transfers.keySet()) {
Map<Character, Set<Integer>> transfer = transfers.get(state);
for(Character ch: transfer.keySet()) {
System.out.printf("[%s]--%s-->%s\n", state, ch, transfer.get(ch));
}
}
}
int states = 0;
Integer addState() {
return states++;
}
void addNext(Integer state, Character ch, Integer next) {
Map<Character, Set<Integer>> transfer = transfers.get(state);
if (transfer == null) {
transfer = new HashMap<Character, Set<Integer>>();
transfers.put(state, transfer);
}
Set<Integer> nexts = transfer.get(ch);
if (nexts == null) {
nexts = new HashSet<>();
transfer.put(ch, nexts);
}
nexts.add(next);
}
Set<Integer> getClosure(Set<Integer> states) {
Set<Integer> closure = new HashSet<>();
closure.addAll(states);
Set<Integer> currents = states;
boolean found;
do {
found = false;
currents = getNexts(currents, null);
for(Integer state: currents) {
if (!closure.contains(state)) {
closure.add(state);
found = true;
}
}
} while (found);
return closure;
}
Set<Integer> getNexts(Set<Integer> states, Character ch) {
Set<Integer> results = new HashSet<>();
for(Integer state: states) {
Map<Character, Set<Integer>> transfer = transfers.get(state);
if (transfer == null) continue;
Set<Integer> nexts;
if (ch == null) {
nexts = transfer.get(ch);
if (nexts == null) continue;
} else {
nexts = new HashSet<Integer>();
Set<Integer> chnext = transfer.get(ch);
if (chnext != null) nexts.addAll(chnext);
Set<Integer> dot = transfer.get('.');
if (dot != null) nexts.addAll(dot);
}
results.addAll(nexts);
}
return results;
}
}
方法二:简化的状态机,广度优先搜索,以p为外层循环。
public class Solution {
public boolean isMatch(String s, String p) {
Set<Integer> states = new HashSet<>();
states.add(0);
for(int i=0; i<p.length(); i++) {
char re = p.charAt(i);
if (re == '*') continue;
boolean star = i<p.length()-1 && p.charAt(i+1) == '*';
Set<Integer> nexts = new HashSet<>();
for(int state: states) {
if (re == '.') {
if (star) {
for(int j=state; j<=s.length(); j++) {
nexts.add(j);
}
} else {
nexts.add(state+1);
}
} else {
if (star) {
for(int j=state; j<=s.length(); j++) {
nexts.add(j);
if (j<s.length() && s.charAt(j) != re) break;
}
} else {
if (state < s.length() && s.charAt(state) == re) nexts.add(state+1);
}
}
}
if (star) i ++;
states = nexts;
if (states.isEmpty()) return false;
}
return states.contains(s.length());
}
}
另一种实现,这种实现最简单:
public class Solution {
public boolean isMatch(String s, String p) {
Set<Integer> states = new HashSet<>();
states.add(0);
char[] sa = s.toCharArray();
char[] pa = p.toCharArray();
for(int i=0; i<pa.length; i++) {
if (pa[i] == '*') continue;
boolean star = i<pa.length-1 && pa[i+1] == '*';
Set<Integer> nexts = new HashSet<>();
for(int state: states) {
if (state >= sa.length) {
if (star) nexts.add(state);
} else if (star) {
nexts.add(state);
for(int j=state; j<sa.length && (pa[i] == '.' || pa[i] == sa[j]); j++) nexts.add(j+1);
} else {
if (pa[i] == '.' || pa[i] == sa[state]) {
nexts.add(state+1);
}
}
}
states = nexts;
}
return states.contains(sa.length);
}
}
另一种实现:
public class Solution {
/*
这些case都很重要,很多BUG!!!
""
""
"aa"
"a"
"aa"
"a*"
"aa"
"aaa"
"aa"
".*"
"a"
"ab*"
*/
public boolean isMatch(String s, String p) {
int n = s.length();
int[] states = new int[n+1];
int[] nexts = new int[n+1];
boolean[] reached = new boolean[n+1];
reached[0] = true;
int size = 0;
states[size++] = 0;
for(int i=0; i<p.length(); i++) {
char re = p.charAt(i);
if (re == '*') continue;
boolean star = i<p.length()-1 && p.charAt(i+1) == '*';
Arrays.fill(reached, false);
int nextSize = 0;
for(int k=0; k<size; k++) {
int state = states[k];
if (re == '.') {
if (star) {
for(int j=state; j<=n; j++) {
if (reached[j]) continue;
reached[j] = true;
nexts[nextSize++] = j;
}
} else if (state < n && !reached[state+1]) {
reached[state+1] = true;
nexts[nextSize++] = state+1;
}
} else {
if (star) {
for(int j=state; j<=s.length(); j++) {
if (!reached[j]) {
reached[j] = true;
nexts[nextSize++] = j;
}
if (j<n && s.charAt(j) != re) break;
}
} else {
if (state < n && s.charAt(state) == re && !reached[state+1]) {
reached[state+1] = true;
nexts[nextSize++] = state + 1;
}
}
}
}
if (star) i ++;
int[] temp = states;
states = nexts;
nexts = temp;
size = nextSize;
if (size == 0) return false;
}
return reached[n];
}
}
方法三:广度优先搜索,简化的自动机,外层循环是s
public class Solution {
private Set<Integer> closure(Set<Integer> states, char[] pa) {
Set<Integer> closure = new HashSet<>();
Set<Integer> found = new HashSet<>(states);
while (!found.isEmpty()) {
Set<Integer> nexts = new HashSet<>();
for(int f: found) {
closure.add(f);
if (f<pa.length-1 && pa[f+1] == '*') nexts.add(f+2);
}
found = nexts;
}
return closure;
}
public boolean isMatch(String s, String p) {
char[] sa = s.toCharArray();
char[] pa = p.toCharArray();
Set<Integer> states = new HashSet<>();
states.add(0);
states = closure(states, pa);
for(int i=0; i<sa.length; i++) {
Set<Integer> nexts = new HashSet<>();
for(int state: states) {
if (state >= pa.length) continue;
if ((pa[state] == '.' || pa[state] == sa[i]) && state < pa.length-1 && pa[state+1] == '*') {
nexts.add(state);
} else if (pa[state] == '.' || pa[state] == sa[i]) {
nexts.add(state+1);
}
}
states = closure(nexts, pa);
}
return states.contains(pa.length);
}
}
方法四:动态规划,以p为外层循环。
public class Solution {
public boolean isMatch(String s, String p) {
char[] sa = s.toCharArray();
char[] pa = p.toCharArray();
boolean[] match = new boolean[sa.length + 1];
match[0] = true;
for(int i = 1; i <= pa.length; i++) {
boolean[] next = new boolean[sa.length + 1];
if (i < pa.length && pa[i] == '*') {
for(int j = 0; j <= sa.length; j++) {
if (match[j] || (j > 0 && next[j - 1] && (pa[i - 1] == '.' || pa[i - 1] == sa[j - 1]))) {
next[j] = true;
}
}
i++;
} else {
for(int j = 1; j <= sa.length; j++) {
if (match[j - 1] && (pa[i - 1] == '.' || pa[i - 1] == sa[j - 1])) {
next[j] = true;
}
}
}
match = next;
}
return match[sa.length];
}
}
方法五:动态规划,以s为外层循环。
public class Solution {
public boolean isMatch(String s, String p) {
char[] sa = s.toCharArray();
char[] pa = p.toCharArray();
boolean[] match = new boolean[pa.length + 1];
match[0] = true;
for(int j = 1; j <= pa.length; j++) {
if (j < pa.length && pa[j] == '*') {
match[j + 1] = match[j - 1];
j++;
}
}
for(int i = 1; i <= sa.length; i++) {
boolean[] next = new boolean[pa.length + 1];
for(int j = 1; j <= pa.length; j++) {
if (j < pa.length && pa[j] == '*') {
if (next[j -1]) {
next[j + 1] = true;
}
if (match[j + 1] && (pa[j - 1] == '.' || pa[j - 1] == sa[i - 1])) {
next[j + 1] = true;
}
j++;
}
else {
if (match[j - 1] && (pa[j - 1] == '.' || pa[j - 1] == sa[i - 1])) {
next[j] = true;
}
}
}
match = next;
}
return match[pa.length];
}
}