You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.
Example 1:
Input:
s = “barfoothefoobarman”,
words = [“foo”,“bar”]
Output: [0,9]
Explanation: Substrings starting at index 0 and 9 are “barfoor” and “foobar” respectively.
The output order does not matter, returning [9,0] is fine too.
Example 2:
Input:
s = “wordgoodstudentgoodword”,
words = [“word”,“student”]
Output: []
翻译
给定一个字符串 s 和一些长度相同的单词 words。在 s 中找出可以恰好串联 words 中所有单词的子串的起始位置。
注意子串要与 words 中的单词完全匹配,中间不能有其他字符,但不需要考虑 words 中单词串联的顺序。
示例 1:
输入:
s = “barfoothefoobarman”,
words = [“foo”,“bar”]
输出: [0,9]
解释: 从索引 0 和 9 开始的子串分别是 “barfoor” 和 “foobar” 。
输出的顺序不重要, [9,0] 也是有效答案。
示例 2:
输入:
s = “wordgoodstudentgoodword”,
words = [“word”,“student”]
输出: []
分析
这题有个巧妙之处就在于words的长度是相同的,这其实大大降低了检查的复杂度,因为我们可以拆分成若干个单词字符串来比对。
我的思路来源于别人的博客:先将words转化为字典,key为word,value为每个word在words里出现的次数。然后在s中依次截取长度等于words所有单词长度总和的子串。再将字串拆分成若干个长度为每个word长度的子串。开始在字典里匹配,每匹配一个就对应的value减一。遍历结束时所有value都为0说明正好匹配。
结果是思路正确,提交超时。主要是因为当s足够长的时候,截取的子串太多了。所以优化方案是:可以先遍历一遍s,截取所有长度为每个word的长度的子串,找到只要匹配一个word的串的下标。进行整体匹配时只要从这些已经匹配了一个word的下标开始截取。
c++实现
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
vector<int> res;
if (words.size() == 0)
return res;
//将words转化为字典
vector<int> start;
map<string,int>::iterator it;
int word_len = words[0].length();
int len = word_len*words.size();
bool pipei = true;
map<string,int> words_dict;
map<string,int> copy;
for (int i = 0; i < words.size(); i++)
{
words_dict[words[i]] += 1;
copy[words[i]] += 1;
}
//剪枝优化
for (int i = 0; i+word_len <= s.length(); i++)
{
string S = s.substr(i,word_len);
for (it = copy.begin(); it != copy.end(); it++)
{
if (S == (*it).first)
{
start.push_back(i);
break;
}
}
}
//程序主干部分,将字符串拆分匹配
vector<int>::iterator itt;
for (itt = start.begin(); itt != start.end() && *itt+len <= s.length(); itt++)
{
string S = s.substr(*itt,len);
for (int j = 0; j < len/word_len; j++)
{
string tmp = S.substr(j*word_len,word_len);
for (it = copy.begin(); it != copy.end(); it++)
{
if ((*it).first == tmp)
{
(*it).second--;
break;
}
}
}
for (it = copy.begin(); it != copy.end(); it++)
{
if ((*it).second != 0)
{
pipei = false;
break;
}
}
if (pipei)
res.push_back(*itt);
else
pipei = true;
copy = words_dict;
}
return res;
}
};