【字符串】题型总结：min window, word ladder II, valid number

最新推荐文章于 2021-02-25 13:50:45 发布

StevenIsSnail

最新推荐文章于 2021-02-25 13:50:45 发布

阅读量965

点赞数

分类专栏： leetcode 字符串

本文链接：https://blog.csdn.net/brandohero/article/details/41548269

版权

leetcode 同时被 2 个专栏收录

32 篇文章 0 订阅

订阅专栏

字符串

12 篇文章 0 订阅

订阅专栏

本文探讨了字符串处理中的几个经典问题，包括最小窗口问题、Word Break系列（1和2）以及有效数字验证。针对每个问题，文章详细介绍了解题思路，如维护字母计数、动态规划剪枝以及最短路径约束，并提供了错误和正确做法的对比，帮助读者深入理解算法应用。

摘要由CSDN通过智能技术生成

Title

Add Date

Acceptance

Difficulty

Reverse Words in a String 2014-03-05 14.2% Medium
Word Break II 2013-10-05 16.7% Hard
Word Break 2013-10-04 21.4% Medium
Word Ladder II 2013-02-10 11.6% Hard
Word Ladder 2013-02-10 18.4% Medium
Word Search 2012-04-18 19.6% Medium
Length of Last Word 2012-03-27 28.9% Easy
Substring with Concatenation of All Words 2012-02-23 18.1% Hard

Title	Add Date	Acceptance	Difficulty
Reverse Words in a String	2014-03-05	14.2%	Medium
Word Break II	2013-10-05	16.7%	Hard
Word Break	2013-10-04	21.4%	Medium
Word Ladder II	2013-02-10	11.6%	Hard
Word Ladder	2013-02-10	18.4%	Medium
Word Search	2012-04-18	19.6%	Medium
Length of Last Word	2012-03-27	28.9%	Easy
Substring with Concatenation of All Words	2012-02-23	18.1%	Hard

1 min window

本题和数据结构联系紧密

维护一个map<char, int> t,保持每个字母的数量。

维护一个queue，保持当前的有效的字母的下标。

维护一个map<char, int> cnt,保持当前队列queue中的字母和数量。

每次从S遍历一个char，如果是有效的字母：

修改cnt[char],加1。
当计数值刚好等于t[char]的时候，说明有一个新的字母就位了， m++。当m==n的时候，说明所有的字母都就位了。
然后从队首取得一个char，查看该char是否是多余的（cnt[char] 超过了t[char],如果是，丢出队列并更新cnt[char]）
如果m==n，即所有的字母都就位，则用当前的下表和队首的下标构成的区间去更新结果（最短长度和下标志）。

  string minWindow(string S, string T) {
    // int cnt[26];
        map<char, int> cnt;
        map<char, int> t;
        for(int i=0; i<T.length(); i++)
            t[T[i]]++;
        int n = t.size();
        if(!n) return "";
        
        queue<int> que;
        int m = 0, mm = -1, mi = -1;
        for(int i=0; i<S.length(); i++){
            char c = S[i];
            if(t.count(c)){
                cnt[c]=cnt[c]+1;
                if(cnt[c] == t[c])  m++;
                que.push(i);
                
                while(!que.empty()){
                    int j = que.front();
                    char d = S[j];
                    if(cnt[d] > t[d]) {
                        cnt[d] --;
                        que.pop();
                    }
                    else break;
                }
            
                if(m==n){
                    assert(!que.empty());
                    int j = que.front();
                    if(mm<0 || mm>i-j+1){
                        mi = j, mm = i-j+1;
                    }
                }
            }
        }
        if(mm<0) return "";
        return S.substr(mi, mm);
    }

2 Word Break 1

DP: res[i] = E(res[0<=j<i] && isW[j][i-1] )

class Solution {
public:
    bool wordBreak(string s, unordered_set<string> &dict) {
        int n = s.length();
        if(!n) return dict.empty();
        vector<vector<bool> > w(n, vector<bool>(n, false));
        for(int i =0; i<n; i++){
            string t = "";
            for(int j = i; j<n; j++){
                t+= s[j];
                w[i][j] = dict.count(t);
            }
        }
        
        vector<bool> t(n+1, false);
        t[0] = true;
        for(int i =1; i<=n; i++){
            for(int j =0; j<i; j++){
                if(t[j] && w[j][i-1]) {
                    t[i] = true;
                    break;
                }
            }
        }
        return t[n];
    }
};

3 Word Break II

cut - tree............................

这道题关键是要剪枝，剪枝方法为

res[i]保存i~n-1区间是否有效（存在被正确地划分的方案），
每次dp到i-1的时候，首先判断i~n-1是否有效，如果无效则剪枝。

class Solution {
public:
    vector<string> wordBreak(string s, unordered_set<string> &dict) {
        int n = s.length();
        if(!n)  return vector<string>();
        
        vector<vector<bool> > isW(n, vector<bool>(n, false));
        for(int i = 0; i<n; i++){
            string tmp = "";
            for(int j = i; j<n; j++){
                tmp+= s[j];
                if(dict.count(tmp)) isW[i][j] = true;
            }
        }
        
        vector<bool> res(n+1, false);
        res[n] = true;
        for(int i = n-1; i>=0; i--){
            for(int j = i+1; j<=n; j++){
                if(res[j] && isW[i][j-1]){
                    res[i] = true;
                    break;
                }
            }
        }

        vector<vector<string> > t(n+1);
        t[0].push_back("");
        for(int i =1; i<=n; i++){
            if(!res[i]) continue; //cut ...
            for(int j = 0; j<i; j++){//0~j-1, j~i-1
                if(t[j].size() && isW[j][i-1]){
                    for(int k = 0; k<t[j].size(); k++){
                        string tmp = t[j][k]; 
                        if(j>0) //[j, i-1] is not the first word
                            tmp+=" "+s.substr(j, i-j);
                        else // [j, i-1] is the first word
                            tmp+=s.substr(j, i-j);
                        t[i].push_back(tmp);
                    }
                }
            }
        }
        return t[n];
    }
};

或者

class Solution {
public:
    vector<string> wordBreak(string s, unordered_set<string> &dict) {
        int n = s.length();
        if(!n)  return vector<string>();
        
        vector<vector<bool> > isW(n, vector<bool>(n, false));
        for(int i = 0; i<n; i++){
            string tmp = "";
            for(int j = i; j<n; j++){
                tmp+= s[j];
                if(dict.count(tmp)) isW[i][j] = true;
            }
        }
        
        vector<bool> res(n+1, false);
        res[n] = true;
        for(int i = n-1; i>=0; i--){
            for(int j = i+1; j<=n; j++){
                if(res[j] && isW[i][j-1]){
                    res[i] = true;
                    break;
                }
            }
        }

        vector<vector<vector<int> > > t(n+1);
        t[0].push_back(vector<int>(1, 0));
        for(int i =1; i<=n; i++){
            if(!res[i]) continue; //cut ...
            for(int j = 0; j<i; j++){//0~j-1, j~i-1
                if(t[j].size() && isW[j][i-1]){
                    for(int k = 0; k<t[j].size(); k++){
                        vector<int> tmp = t[j][k]; 
                        tmp.push_back(i-j);
                        t[i].push_back(tmp);
                    }
                }
            }
        }
        
        vector<string> ss;
        for(int i = 0; i<t[n].size(); i++){
            string tmp="";
            int l = 0, k = 0;
            for(int j=0; j<t[n][i].size(); j++){
                k = t[n][i][j];
                if(!k) continue;
                tmp+=s.substr(l, k);
                if(j<t[n][i].size()-1) tmp+=" ";
                l+= k;
            }
            ss.push_back(tmp);
        }
        return ss;
    }
};

3 Word Ladder II

这道题要求找最短的s-》t的变换，如果把每个字符串看成一个节点，就相当于找s-》t节点的最短路径。

一般做法是采用“最短路径约束”

最短路径约束

首先求出图中到所有点的单源(s)最短路径d[node]
然后再dfs搜索路径，搜索过程中只遍历满足d[j] == d[i]+1的边<i, j> ，得到的s->t的路径就是最短的（最短路径约束）

对图中所有点做单源最短路径，很容易想到对源点s做bfs，然后记录每个顶点到s的距离d[i]。

那么在做单源最短路径之前，是不是需要建立起图来？

如果先建图，那么最容易想到任取两点，查看二者是否可达。

错误做法 n*n+bfs + graph simplify + dfs

这个题先试了一下用图表示每个字符串，建立起5000规模的图。遗憾的是超时了。

代码如下：

class Solution {
public:
    inline bool isNb(string &a, string &b){//判断相邻，时间代价O（n^2)太长了~~
        int cnt = 0;
        for(int i = 0; i<a.length(); i++){
            if(a[i]!=b[i]) cnt++;
            if(cnt>1) return false;
        }
        return cnt == 1;
    }
    vector<vector<string>> findLadders(string start, string end, unordered_set<string> &dict) {
        map<string, int> num; vector<string> strs;
        strs.push_back(start), num[start] = strs.size() - 1;
        
        typedef unordered_set<string>::iterator IT;
        for(IT it = dict.begin(); it!=dict.end(); it++){
            if(!num.count(*it)){
                strs.push_back(*it), num[*it] = strs.size() - 1;
            }
        }
        if(!num.count(end)){
            strs.push_back(end), num[end] = strs.size() - 1;
        }
        
        int n = strs.size();
        vector<vector<int> > graph(n);
        int s = 0, t = num[end];
        for(int i = 0; i<strs.size(); i++){
            for(int j = i; j<strs.size(); j++){
                string &a = strs[i], &b = strs[j];
                if(isNb(a, b)) graph[i].push_back(j), graph[j].push_back(i);
            }
        }
        
        // BFS to find d[i]
        vector<int> d(n, -1);
        queue<int> que;
        d[0] = 1, que.push(0);
        while(!que.empty()){
            int i = que.front(); que.pop();
            if(i == t) break;
            for(int j =0; j<graph[i].size(); j++){
                int k = graph[i][j];
                if(d[k]<0) {
                    que.push(k);
                    d[k] = d[i]+1;
                }
            }
        }
        
        // simplify the graph using d[i]
        vector<vector<string> > res;
        vector<string> tmp;
        for(int i =0; i<n; i++){
            vector<int> tmp;
            for(int j =0; j<graph[i].size(); j++){
                int k = graph[i][j];
                if(d[k] == d[i]+1) tmp.push_back(k);
            }
            graph[i] = tmp;
        }
        
        tmp.push_back(start);
        dfs(strs, d, graph, res, tmp, 0, t);
        return res;
    }
    void dfs(const vector<string> &strs, const vector<int> &d, const vector<vector<int> > &graph, vector<vector<string> > &res, vector<string> &cur, int idx, int t){
        if(cur.size() == d[t]){
            if(idx == t){
                res.push_back(cur);
            }
            return;
        }
        
        for(int i =0; i<graph[idx].size(); i++){
            int j = graph[idx][i];
            cur.push_back(strs[j]);
            dfs(strs, d, graph, res, cur, j, t);
            cur.pop_back();
        }
    }
};

上述代码会超时，原因是建图调用isNb（str, str) 了N^2次。而且graph[i][j]中包括太多实际上不会在单源（s）最短路径中出现的点。

后来考虑变换str，查找邻居节点。这样建图是O（n）的

但是建完图，bfs的时候还要记录d[i]，然后得到的图里有些边<i, j>是不满足d[j] = d[i] +1的，如果留下，会对后续dfs搜索路径造成复杂度。所以还需要bfs的时候化简图。而这个思路也超时。

正确的做法

先不建图，bfs的时候再搜索每个节点i的邻居j，判断<i,j>是否满足最短路径约束，满足则把<i, j>加入到图中。
然后对这个最简的图做dfs

    vector<vector<string>> findLadders(string start, string end, unordered_set<string> &dict) {
        map<string, int> num; vector<string> strs;
        strs.push_back(start), num[start] = strs.size() - 1;
        typedef unordered_set<string>::iterator IT;
        for(IT it = dict.begin(); it!=dict.end(); it++){
            if(!num.count(*it)){
                strs.push_back(*it), num[*it] = strs.size() - 1;
            }
        }
        if(!num.count(end)){
            strs.push_back(end), num[end] = strs.size() - 1;
        }
        
        int n = strs.size();
        int s = num[start], t = num[end];

        vector<vector<int> > graph(n);
        vector<int> d(n, -1);
        queue<int> que;
        d[s] = 1, que.push(s);
        while(!que.empty()){
            int i = que.front();
            que.pop(); //@error 3: forget to pop after peek ...........
            if(d[i] == d[t]) break; //@error 2: we should break when the t's layer reached, not when t reached
            string &ss = strs[i];
            for(int j = 0; j<ss.length(); j++){
                char c = ss[j];
                for(char k = 'a'; k <= 'z'; k++){
                    if(k == c) continue;
                    ss[j] = k; //@error 4: forget this...............
                    if(num.count(ss)){
                        int l = num[ss];
                        if(d[l] < 0) {
                            d[l] = d[i] + 1;
                            //graph[i].push_back(l); //@errror 1:here, not only when d[l]<0 l is i's child, but also when d[l] == d[i]+1
                            que.push(l);
                            //if(l == t) goto END_BFS; @error 2: 
                        }
                        if(d[l] == d[i]+1) 
                            graph[i].push_back(l);//@error 1:
                    }
                }
                ss[j] = c;
            }
        }
      
        vector<vector<string> > res;
        vector<int> tmp;
        tmp.push_back(s);
        dfs(strs, graph, d[t], t,  res, tmp);
        return res;
    }
    
    void dfs(const vector<string> &strs, const vector<vector<int> > &graph, const int md, const int t,
              vector<vector<string> > &res, vector<int> &cur){
        int idx = cur[cur.size()-1];
        if(cur.size() == md){
            if(idx == t){
                vector<string> tmp;
                for(int i = 0; i < cur.size(); i++){
                    tmp.push_back(strs[cur[i]]);
                }
                res.push_back(tmp);
            }
            return;
        }
        
        for(int i =0; i<graph[idx].size(); i++){
            int j = graph[idx][i];
            cur.push_back(j);
            dfs(strs, graph, md, t, res, cur);
            cur.pop_back();
        }
    }
};

短短一个bfs，掉进无数坑。最近记忆力衰退，什么都忘记。。。。。

4 valid number

最好写一个子函数，用来读取和验证一个number_pattern( 各种数字和正负号（只读取一次）)；并传一个参数告诉他是否可以有点符号。

number_pattern = [+,-][0~9]*.[0~9]*

number = number_pattern [e number_pattern]

class Solution {
public:
    int getNumber(const char *s, bool hasDot){ //读取number_pattern = [+,-][0~9]*.[0~9]*,并要求至少出现一个数字([0~9]).对于无效输入返回-1
         int i = 0;
         if(s[i] == '+' || s[i] == '-') i++;
         int j = i;
         while(s[i]<='9'&&s[i]>='0') i++;
         int k1 = i - j;
         if(s[i]=='.') {
            if(!hasDot) { return -1;}
            i++;
            int j = i;
            while(s[i]<='9'&&s[i]>='0') i++;
            int k2 = i - j;
            if(!k1 && !k2) { return -1;}
         }
         else if(!k1) { return -1;} //没有读取到数字（中间卡在哪个无效字符上了）
         return i;  
    }
    bool isNumber(const char *s) {// number = number_pattern [e number_pattern]
        int i =0;
        while(s[i]==' ') i++;
        bool t ;
        int k1 = getNumber(s+i, true);
        if(k1<0) return false;//无效number_pattern
        i += k1;
        if(s[i]=='e') //读取到e
        {
            i++;
            int k2 = getNumber(s+i, false);//e右边的number_pattern不许有dot
            if(k2<0) return false;         //无效number_pattern
            i += k2;
            if(!k1 || !k2) return false;
        }
        while(s[i]==' ') i++;
        return s[i] == 0;
    }
};

5 Substring with all words

实际上也是一个queue<int>保存匹配到的单词，

当匹配到新的单词t之后，cnt[t]++,如果个数cnt[t]超了，则需要从que弹出一段，直到弹到第一个该单词t。
- 如果队列大小为n=L.size()，说明找到一个匹配
当未匹配到时，que清空，cnt[]置为0.

str unique + KMP strs matches + queue , cnt

class Solution {
public:
    void kmp(string &s, string &word, vector<int> &res){
        vector<int> nxt(word.length()+2);
        nxt[0] = -1, nxt[1] = 0;
        for(int i=1; word[i]; i++){
            int t = nxt[i];
            while(t>=0 && word[t] != word[i])
                t = nxt[t];
            nxt[i+1] = t+1;
        }
    
        for(int i =0, j = 0; i<s.length(); i++){
            while(j>=0 && s[i]!=word[j]) j = nxt[j];
            j++;
            if(j==word.length()) res.push_back(i-j+1);
        }
    }
    vector<int> findSubstring(string S, vector<string> &L) {
        vector<int> res;
        if(L.size()==0) return res;
        
        // unique L to ul and wc
        sort(L.begin(), L.end());
        vector<string> ul; //unique L
        vector<int> wc; 
        int ln = 1;
        for(int i=1; i<=L.size(); i++){
            if(i== L.size() || L[i]!=L[i-1]){
                ul.push_back(L[i-1]);
                wc.push_back(ln);
                ln = 1;
            }else ln ++;
        }
        
        vector<int> matches(S.length(), -1);
        for(int i=0; i<ul.size(); i++){
            string & word = ul[i];
            vector<int> res;
            kmp(S, word, res);
            for(int j =0; j<res.size(); j++){
                matches[res[j]] = i;
            }
        }
         
        int n = L.size(), l = L[0].length();
        queue<int> que;
        vector<int> cnt(n, 0);
        for(int i =0; i<l; i++){
            que = queue<int>(), cnt = vector<int>(n, 0);
            for(int j = i; j<S.length(); j+=l){
                if(matches[j]>=0){
                    int t = matches[j];
                    que.push(t);
                    cnt[t]++;
                    if(cnt[t]>wc[t]){
                        //while(que.front()!=t) que.pop(); //@error 1: should update cnt when que changes
                        //que.pop();
                        while(!que.empty()){
                            int s = que.front(); que.pop();
                            cnt[s] --;
                            if(s==t) break;
                        }
                    }
                    if(que.size()==n) res.push_back(j-(n-1)*l); //@error 3: j-n*l+1
                }
                //@error 2: lost: when some word no match, then clear que
                else que = queue<int>(), cnt = vector<int>(n, 0);
            }
        }
        return res;
    }
};

或者不用kmp, 用hash查找

Hash+ que/cnt

class Solution {
public:
    vector<int> findSubstring(string S, vector<string> &L) {
    	vector<int> res;
    	if(L.size()==0) return res;
    	int N=L[0].length();

        #define rep(i,n) for(int i=0;i<n;i++)
    	map <string,int > words;
    	map <int,int > cnt;
    	rep(i,L.size()) { 
    		if(words.find(L[i])==words.end()) words[L[i]]=i,cnt[i]=1;
    		else { 
    			int j=words[L[i]];
    			cnt[j]++;
    		}
    	}
    	vector<int> tag(S.length(),-1);
    	rep(i,S.length()){
    		string tmp=S.substr(i,N);
    		if(words.find(tmp)!=words.end()){
    			tag[i]=words[tmp];
    		}
    	}
    
    	rep(i,N){
    		int j=i;
    		deque<int> cur;
    		int begin=j;//begin go ahead n step when cur[] pop n elements
    		while(j<S.length()){
    
    			//a new word to append
    			if(tag[j]>=0){
    				deque<int>::iterator it;
    				if(count(cur.begin(),cur.end(),tag[j])==cnt[tag[j]]){//overflow of word j
    					it=find(cur.begin(),cur.end(),tag[j]);

    					begin+=(it-cur.begin()+1)*N;
    					cur.erase(cur.begin(),it+1);
    				}
    				cur.push_back(tag[j]);
    
    				//if a new concatness is found after push_back
    				if(cur.size()==L.size()) {
    					res.push_back(begin);
    					
    					begin+=N;
    					cur.pop_front();
    				}
    			}
    
    			//a nonword: divide the string
    			else{//-1 
    			
    			    begin=j+N;
    				cur.clear();
    			}
    			j+=N;
    		}
    	}
    	return res;
    }
};