Substring with Concatenation of All Words
Solution 1
由于给定字符串的长度和给定的待匹配词的数目都太大了,想要枚举所有的组合然后匹配字符串的暴力思路肯定是不太行。可是每个待匹配此的长度一定,那么我们可以确定的是待匹配的子串的长度是words.size()*words[0].size()
,并且只要保证每一个words[0].size()
的子子串匹配上其中一个待匹配词,且不重复匹配就可以判定存在了。因此遍历的思路就是从每一个位置开始(题目要求没办法)取一个words.size()*words[0].size()
长度的子串,然后检查每words[0].size()
长度的子子串是否满足一个待匹配词,然后中途存在不存在单词或者多出现(同一个词可能在待匹配序列中出现多次,如果出现过多肯定不行)则认为无法匹配,否则能够正好匹配(对于小于的情形不用担心,必然会有一个词多出现或者出现不存在词替代其位置)。为了进一步简化复杂度,使用哈希函数匹配取代匹配过程。
- 时间复杂度: O ( M N n ) O(MNn) O(MNn),其中 M M M为输入字符串的长度, N N N为待匹配词的个数, n n n为每个词的长度
- 空间复杂度: O ( N + n ) O(N + n) O(N+n),其中 N N N为待匹配词的个数, n n n为每个词的长度,分别为构造哈希表和取子串所消耗
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
vector<int>ans;
if (s.size() < words.size() * words[0].size()) return ans;
// 计算每个词在给定列表中的次数
unordered_map<string, int> count;
unordered_map<string, int>::iterator it;
for (auto &word: words) {
if ((it = count.find(word)) == count.end()) {
count.insert(make_pair(word, 1));
}
else{
count[word]++;
}
}
for (int i = 0; i <= s.size() - words.size() * words[0].size(); ++i) {
int j = 0;
unordered_map<string, int> check;
while (j <= words.size()) {
if (j == words.size()) {
ans.push_back(i);
break; // n个情形都符合,保存
}
string tmpS = s.substr(i + words[j].size() * j, words[j].size());
if ((it = count.find(tmpS)) == count.end()) {
break; // 中间有一个不存在的序列,跳出
}
else if ((it = check.find(tmpS)) == check.end()){
check.insert(make_pair(tmpS, 1));
}
else {
check[tmpS]++;
}
if (check[tmpS] > count[tmpS]) break; // 有一个序列多了,跳出(少了不担心,必然有一个多了,或者出现一个不存在的)
j++;
}
}
return ans;
}
};
Solution 2
来自网上的更简单的算法:滑动窗口详细通俗的思路分析,多解法 - 串联所有单词的子串 - 力扣(LeetCode) (leetcode-cn.com)
这个教程说的比较通俗移动,整体思路就是以单词长度为单位向后检查,并在上一个思路中提到的两种判负情形下华东检查窗口。为了能够遍历所有情形,将偏移量设置为单词长度内的所有值,进行遍历。
- 时间复杂度: O ( M + N ) O(M+N) O(M+N),其中 M M M为输入字符串的长度, N N N为待匹配词的个数
- 空间复杂度: O ( N + n ) O(N+n) O(N+n),其中 N N N为待匹配词的个数, n n n为每个词的长度,分别为构造哈希表和取子串所消耗
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
vector<int>ans;
if (s.size() < words.size() * words[0].size()) return ans;
// 计算每个词在给定列表中的次数
unordered_map<string, int> count;
unordered_map<string, int>::iterator it;
for (auto &word: words) {
if ((it = count.find(word)) == count.end()) {
count.insert(make_pair(word, 1));
}
else{
count[word]++;
}
}
int lenWord = words[0].size();
// 共计words[0].size()个偏移量
for (int offset = 0; offset < lenWord; ++offset) {
unordered_map<string, int> check;
int numChecked = 0; // 当前check表里面已经记录的单词个数,永远是前numChecked个
for (int i = offset; i <= s.size() - words.size() * lenWord; ) {
bool flag = true;
// 检查长度下后面剩下的一些单词整理到哈希表中
while (numChecked < words.size()) {
// cout << i << " " << numChecked << " " << s.substr(i, words.size() * lenWord) << endl;
string word = s.substr(i + numChecked * lenWord, lenWord);
if ((it = count.find(word)) == count.end()) {
// 不匹配的词,一直向后移动到这个词从窗口前面移动出去
i += (numChecked + 1) * lenWord;
check.clear();
numChecked = 0;
break;
}
else {
if ((it = check.find(word)) == check.end()) {
check.insert(make_pair(word, 1));
}
else {
check[word]++;
}
numChecked++;
if (check[word] > count[word]) {
// 一个词多了,向后移动直到从窗口前面移动出一个相同的单词
int numRemoved = 0;
while (check[word] > count[word]) {
string tmpWord = s.substr(i + numRemoved * lenWord, lenWord);
check[tmpWord]--;
numRemoved++;
}
numChecked -= numRemoved;
i += numRemoved * lenWord;
flag = false;
// cout << numRemoved << " " << i << endl;
break;
}
}
}
if (numChecked == words.size()) {
ans.push_back(i);
}
if (numChecked > 0 && flag) {
// 需要向前移动,flag用于跳过出现相同词的时候多移动一个
string tmpWord = s.substr(i, lenWord);
check[tmpWord]--;
numChecked--;
i += lenWord;
}
}
}
return ans;
}
};
Solution 3
Solution 1的Python实现
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
ans = list()
numWord = len(words)
lenWord = len(words[0])
if len(s) < numWord * lenWord: return ans
count = dict()
for word in words:
if word not in count:
count[word] = 1
else:
count[word] += 1
for i in range(len(s) - numWord * lenWord + 1):
j = 0;
check = dict()
while j <= numWord:
if j == numWord:
ans.append(i)
break
tmpS = s[i + lenWord * j: i + lenWord * (j + 1)]
# print(i, j, tmpS, s[i: i + lenWord * numWord])
if tmpS not in count:
break
elif tmpS not in check:
check[tmpS] = 1
else:
check[tmpS] += 1
if check[tmpS] > count[tmpS]: break
j += 1
return ans
Solution 4
Solution 2的Python实现
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
ans = list()
numWord = len(words)
lenWord = len(words[0])
if len(s) < numWord * lenWord: return ans
count = dict()
for word in words:
if word not in count:
count[word] = 1
else:
count[word] += 1
for offset in range(lenWord):
check = dict()
numChecked = 0
i = offset
while i <= len(s) - numWord * lenWord:
flag = True
# print(offset, i, numChecked, word, s[i: i + numWord * lenWord])
while numChecked < numWord:
word = s[i + numChecked * lenWord: i + (numChecked + 1) * lenWord]
if word not in count:
i += (numChecked + 1) * lenWord
check.clear()
numChecked = 0
break
else:
if word not in check:
check[word] = 1
else:
check[word] += 1
numChecked += 1
if check[word] > count[word]:
# print(offset, i)
numRemoved = 0
while check[word] > count[word]:
tmpWord = s[i + numRemoved * lenWord: i + (numRemoved + 1) * lenWord]
check[tmpWord] -= 1
numRemoved += 1
numChecked -= numRemoved
i += numRemoved * lenWord
flag = False
break
if numChecked == numWord:
ans.append(i)
# print("ss", offset, i, numChecked, word, s[i: i + numWord * lenWord])
if numChecked > 0 and flag:
tmpWord = s[i: i + lenWord]
check[tmpWord] -= 1
numChecked -= 1
i += lenWord
return ans