LeetCode - 解题笔记 - 30 - Substring with Concatenation of All Words

最新推荐文章于 2022-03-25 16:53:26 发布

支锦铭

最新推荐文章于 2022-03-25 16:53:26 发布

阅读量105

点赞数

分类专栏： LeetCode解题笔记文章标签：算法 leetcode

本文链接：https://blog.csdn.net/cary_leo/article/details/114581605

版权

LeetCode解题笔记专栏收录该内容

191 篇文章 1 订阅

订阅专栏

Substring with Concatenation of All Words

Solution 1

由于给定字符串的长度和给定的待匹配词的数目都太大了，想要枚举所有的组合然后匹配字符串的暴力思路肯定是不太行。可是每个待匹配此的长度一定，那么我们可以确定的是待匹配的子串的长度是words.size()*words[0].size()，并且只要保证每一个words[0].size()的子子串匹配上其中一个待匹配词，且不重复匹配就可以判定存在了。因此遍历的思路就是从每一个位置开始（题目要求没办法）取一个words.size()*words[0].size()长度的子串，然后检查每words[0].size()长度的子子串是否满足一个待匹配词，然后中途存在不存在单词或者多出现（同一个词可能在待匹配序列中出现多次，如果出现过多肯定不行）则认为无法匹配，否则能够正好匹配（对于小于的情形不用担心，必然会有一个词多出现或者出现不存在词替代其位置）。为了进一步简化复杂度，使用哈希函数匹配取代匹配过程。

时间复杂度： $O (M N n)$ ，其中 $M$ 为输入字符串的长度， $N$ 为待匹配词的个数， $n$ 为每个词的长度
空间复杂度： $O (N + n)$ ，其中 $N$ 为待匹配词的个数， $n$ 为每个词的长度，分别为构造哈希表和取子串所消耗

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        vector<int>ans;
        if (s.size() < words.size() * words[0].size()) return ans;
        
        // 计算每个词在给定列表中的次数
        unordered_map<string, int> count;
        unordered_map<string, int>::iterator it;
        for (auto &word: words) {
            if ((it = count.find(word)) == count.end()) {
                count.insert(make_pair(word, 1));
            }
            else{
                count[word]++;
            }
        }
        
        for (int i = 0; i <= s.size() - words.size() * words[0].size(); ++i) {
            int j = 0;
            unordered_map<string, int> check;
            while (j <= words.size()) {
                if (j == words.size()) {
                    ans.push_back(i);
                    break; // n个情形都符合，保存
                }
                
                string tmpS = s.substr(i + words[j].size() * j, words[j].size());
                if ((it = count.find(tmpS)) == count.end()) {
                    break; // 中间有一个不存在的序列，跳出
                }
                else if ((it = check.find(tmpS)) == check.end()){
                    check.insert(make_pair(tmpS, 1));
                } 
                else {
                    check[tmpS]++;
                }
                
                if (check[tmpS] > count[tmpS]) break; // 有一个序列多了，跳出（少了不担心，必然有一个多了，或者出现一个不存在的）
                
                j++;
            }
        }
        
        return ans;
    }
};

Solution 2

来自网上的更简单的算法：滑动窗口详细通俗的思路分析，多解法 - 串联所有单词的子串 - 力扣（LeetCode） (leetcode-cn.com)

这个教程说的比较通俗移动，整体思路就是以单词长度为单位向后检查，并在上一个思路中提到的两种判负情形下华东检查窗口。为了能够遍历所有情形，将偏移量设置为单词长度内的所有值，进行遍历。

时间复杂度： $O (M + N)$ ，其中 $M$ 为输入字符串的长度， $N$ 为待匹配词的个数
空间复杂度： $O (N + n)$ ，其中 $N$ 为待匹配词的个数， $n$ 为每个词的长度，分别为构造哈希表和取子串所消耗

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        vector<int>ans;
        if (s.size() < words.size() * words[0].size()) return ans;
        
        // 计算每个词在给定列表中的次数
        unordered_map<string, int> count;
        unordered_map<string, int>::iterator it;
        for (auto &word: words) {
            if ((it = count.find(word)) == count.end()) {
                count.insert(make_pair(word, 1));
            }
            else{
                count[word]++;
            }
        }
        
        int lenWord = words[0].size();
        // 共计words[0].size()个偏移量
        for (int offset = 0; offset < lenWord; ++offset) {
            unordered_map<string, int> check;
            int numChecked = 0; // 当前check表里面已经记录的单词个数，永远是前numChecked个
            for (int i = offset; i <= s.size() - words.size() * lenWord; ) {
                bool flag = true;
                
                // 检查长度下后面剩下的一些单词整理到哈希表中
                while (numChecked < words.size()) {
                    // cout << i << " " << numChecked << " " << s.substr(i, words.size() * lenWord) << endl;
                    string word = s.substr(i + numChecked * lenWord, lenWord);
                    if ((it = count.find(word)) == count.end()) {
                        // 不匹配的词，一直向后移动到这个词从窗口前面移动出去
                        i += (numChecked + 1) * lenWord;
                        check.clear();
                        numChecked = 0;
                        break;
                    }
                    else {
                        if ((it = check.find(word)) == check.end()) {
                            check.insert(make_pair(word, 1));
                        }
                        else {
                            check[word]++;
                        }
                        numChecked++;
                        if (check[word] > count[word]) {
                            // 一个词多了，向后移动直到从窗口前面移动出一个相同的单词
                            int numRemoved = 0;
                            while (check[word] > count[word]) {
                                string tmpWord = s.substr(i + numRemoved * lenWord, lenWord);
                                check[tmpWord]--;
                                numRemoved++;
                            }
                            numChecked -= numRemoved;
                            i += numRemoved * lenWord;
                            flag = false;
                            // cout << numRemoved << " " << i << endl;
                            break;
                        }
                    }
                }
                
                if (numChecked == words.size()) {
                    ans.push_back(i);
                }
                
                if (numChecked > 0 && flag) {
                    // 需要向前移动，flag用于跳过出现相同词的时候多移动一个
                    string tmpWord = s.substr(i, lenWord);
                    check[tmpWord]--;
                    numChecked--;
                    i += lenWord;
                }
            }
        }
        
        return ans;
    }
};

Solution 3

Solution 1的Python实现

class Solution:
    def findSubstring(self, s: str, words: List[str]) -> List[int]:
        ans = list()
        numWord = len(words)
        lenWord = len(words[0])
        
        if len(s) < numWord * lenWord: return ans
        
        count = dict()
        for word in words:
            if word not in count:
                count[word] = 1
            else:
                count[word] += 1
                
        for i in range(len(s) - numWord * lenWord + 1):
            j = 0;
            check = dict()
            
            while j <= numWord:
                if j == numWord: 
                    ans.append(i)
                    break
                    
                tmpS = s[i + lenWord * j: i + lenWord * (j + 1)]
                # print(i, j, tmpS, s[i: i + lenWord * numWord])
                if tmpS not in count:
                    break
                elif tmpS not in check:
                    check[tmpS] = 1
                else:
                    check[tmpS] += 1
                    
                    if check[tmpS] > count[tmpS]: break
                        
                j += 1
                
        return ans

Solution 4

Solution 2的Python实现

class Solution:
    def findSubstring(self, s: str, words: List[str]) -> List[int]:
        ans = list()
        numWord = len(words)
        lenWord = len(words[0])
        
        if len(s) < numWord * lenWord: return ans
        
        count = dict()
        for word in words:
            if word not in count:
                count[word] = 1
            else:
                count[word] += 1
        
        for offset in range(lenWord):
            check = dict()
            numChecked = 0
            i = offset
            while i <= len(s) - numWord * lenWord:
                flag = True
                # print(offset, i, numChecked, word, s[i: i + numWord * lenWord])
                while numChecked < numWord:
                    word = s[i + numChecked * lenWord: i + (numChecked + 1) * lenWord]
                    if word not in count:
                        i += (numChecked + 1) * lenWord
                        check.clear()
                        numChecked = 0
                        break
                    else:
                        if word not in check:
                            check[word] = 1
                        else:
                            check[word] += 1
                            
                        numChecked += 1
                        
                        if check[word] > count[word]:
                            # print(offset, i)
                            numRemoved = 0
                            while check[word] > count[word]:
                                tmpWord = s[i + numRemoved * lenWord: i + (numRemoved + 1) * lenWord]
                                check[tmpWord] -= 1
                                numRemoved += 1
                            
                            numChecked -= numRemoved
                            i += numRemoved * lenWord
                            flag = False
                            break
                            
                if numChecked == numWord: 
                    ans.append(i)
                    # print("ss", offset, i, numChecked, word, s[i: i + numWord * lenWord])
                    
                if numChecked > 0 and flag:
                    
                    tmpWord = s[i: i + lenWord]
                    check[tmpWord] -= 1
                    numChecked -= 1
                    i += lenWord
                
        return ans

支锦铭

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
LeetCode - 解题笔记 - 30 - Substring with Concatenation of All Words

Substring with Concatenation of All WordsSolution 1由于给定字符串的长度和给定的待匹配词的数目都太大了，想要枚举所有的组合然后匹配字符串的暴力思路肯定是不太行。可是每个待匹配此的长度一定，那么我们可以确定的是待匹配的子串的长度是words.size()*words[0].size()，并且只要保证每一个words[0].size()的子子串匹配上其中一个待匹配词，且不重复匹配就可以判定存在了。因此遍历的思路就是从每一个位置开始（题目要求没办法）取一个wo
复制链接

扫一扫

专栏目录