题目
You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.
Example 1:
Input:
s = "barfoothefoobarman",
words = ["foo","bar"]
Output: [0,9]
Explanation: Substrings starting at index 0 and 9 are "barfoor" and "foobar" respectively.
The output order does not matter, returning [9,0] is fine too.
Example 2:
Input:
s = "wordgoodstudentgoodword",
words = ["word","student"]
Output: []
思路
如果需要找的words都是等长的,直接利用滑动窗口以words为单位检索
否则,需要先利用滑动窗口以letter为单位检索符合letter统计的区间,然后再在数量和对象上判断该区间是否为目标words组成
代码
#include "stdafx.h"
#include <list>
#include <vector>
#include <map>
#include <iostream>
using namespace std;
void showmap(map<char, int>& input) {
map<char, int>::iterator it;
for (it = input.begin();it!=input.end();it++) {
cout << it->first<<'\t'<<it->second << endl;
}
}
vector<int> findSubstring(string S, vector<string> &L) {
vector<int> ans,ans_result;
int n = S.size();
//make statistics of the letter of L
map<char, int> cmap,curmap;
for (string s : L) {
for (char c : s) {
if (cmap.find(c) != cmap.end()) {
cmap[c] ++;
}
else {
cmap[c] =1;
}
}
}
//showmap(cmap);
int count = 0;
int start = 0;
int M = cmap.size();
//sliding windows, searching for the fragments that in line with the statistics of letter
for (int i = 0; i < n; i++) {
char c = S[i];
if (cmap.find(c) != cmap.end()) {
if (curmap.find(c) != curmap.end()) { curmap[c]++; }
else { curmap[c] = 1; }
if (curmap[c] == cmap[c]) { count++; }
while (curmap[c] > cmap[c]) {
char t = S[start];
if (curmap[t] == cmap[t]) count--;
curmap[t]--;
start++;
}
if (count == cmap.size()) {
ans.push_back(start);
char t = S[start];
count--;
curmap[t]--;
start++;
}
}
else {
curmap.clear();
start = i + 1;
count = 0;
}
}
//make statistics of the words and the words' length of L
map<string, int> wmap, wurmap;
map<int, int> wlmap, wlurmap;
int wl_total = 0;
for (string s : L) {
if (wmap.find(s) != wmap.end()) { wmap[s]++; }
else {wmap[s] = 1;}
if (wlmap.find(s.length()) == wlmap.end()) { wlmap[s.length()] ++; }
else { wlmap[s.length()] = 1; }
wl_total += s.length();
}
for (int i = 0; i < ans.size(); i++) {
map<int, int>::iterator itn;
start = ans[i];
bool flag = false;//whether the word have been verified as a member of wmap
bool flag_total = true;//whether a segment is a final result
int count = 0;
wurmap.clear();
//cout << start << '\t' << ans[i] + wl_total<<endl;
//judge from the start to the end of the fragment
while ((start<(ans[i] + wl_total)) && flag_total) {
//for each possible length
for (itn = wlmap.begin(); itn != wlmap.end(); itn++) {
//if the word has not been verified as the member of wmap
if (flag == false) {
//capture a word
string tmp = S.substr(start, itn->first);
//judge if the word is a member of wmap
if (wmap.find(tmp) != wmap.end()) {
//use wurmap to make statistics of the words in the fragment
if (wurmap.find(tmp) != wurmap.end()) { wurmap[tmp]++; }
else { wurmap[tmp] = 1; }
//if the numbers of words equal in wurmap and wmap , add count, meaning that all the same word have been find out
if (wurmap[tmp] == wmap[tmp]) { count++; }
else { if (wurmap[tmp] > wmap[tmp]) { flag_total = false; } }// if the number of some word exceed that in wmap, it means this fragment is not a right final result
//this word has been verified
flag = true;
//move to next word' start position of this fragment
start += itn->first;
}
}
else { break; }
}
//if all possible length are not valid, that means no word in L has been find in the current start position and the verify process is finished, this fragment is a false positive
if (flag == false) {flag_total = false; }
else { flag = false; }
}
if (start == ans[i] + wl_total&&flag_total == true) { ans_result.push_back(ans[i]); }
}
return ans_result;
}
int main()
{
vector<string> L;
L.push_back("apple");
L.push_back("bee");
cout << L.size() << endl << L[0].size();
vector<int> result = findSubstring("pineapplebeebirdsquarebeeapple", L);
cout << endl << result.size() << endl;
return 0;
}
问题
以上方法并不能解决以下特殊情况:
Example 3:
Input:
s = "goodeffecteffectivemanyappleeffectivegoodeffect",
words = ["effect","effective","good"]
Output: [0,28]
因为effect 和effective 长度不同,但是前面部分一样,这样的话以以上的方法无法解决。所以这个问题还是目标words等长才可能在短时间内把问题解决,不然情况太复杂了