30. 串联所有单词的子串(困难)
给定一个字符串 s 和一些长度相同的单词 words。找出 s 中恰好可以由 words 中所有单词串联形成的子串的起始位置。
注意子串要与 words 中的单词完全匹配,中间不能有其他字符,但不需要考虑 words 中单词串联的顺序。
示例 1:
输入:
s = "barfoothefoobarman",
words = ["foo","bar"]
输出:[0,9]
解释:
从索引 0 和 9 开始的子串分别是 "barfoo" 和 "foobar" 。
输出的顺序不重要, [9,0] 也是有效答案。
示例 2:
输入:
s = "wordgoodgoodgoodbestword",
words = ["word","good","best","word"]
输出:[]
一开始想到的方法(超时):
生成所有可能组合,即words中单词的全排列,在字符串s中进行匹配
class Solution:
# 返回needle字符串在haystack字符串中匹配的下标列表,通过滚动哈希算法
def strStr(self, haystack: str, needle: str) -> int:
result = []
L, n = len(needle), len(haystack)
if L > n:
return result
a = 26
modulus = 2 ** 31
h_to_int = lambda i: ord(haystack[i]) - ord('a')
needle_to_int = lambda i: ord(needle[i]) - ord('a')
h = ref_h = 0
for i in range(L):
h = (h * a + h_to_int(i)) % modulus
ref_h = (ref_h * a + needle_to_int(i)) % modulus
if h == ref_h:
result.append(0)
aL = pow(a, L, modulus)
for start in range(1, n - L + 1):
h = (h * a - h_to_int(start - 1) * aL + h_to_int(start + L - 1)) % modulus
if h == ref_h:
result.append(start)
return result
# 生成words中单词组成的全排列,存入temp列表
def findSubstring(self, s: str, words: List[str]) -> List[int]:
def Permutation(words: List[str], m: int, n: int):
if m == n:
Str = ''
for c in words:
Str += c
temp.append(Str)
for i in range(m, n + 1):
words[m], words[i] = words[i], words[m]
Permutation(words, m + 1, n)
words[m], words[i] = words[i], words[m]
result = []
temp = []
Permutation(words, 0, len(words) - 1)
temp = set(temp)
temp = list(temp)
for Str in temp:
index = self.strStr(s, Str)
for i in index:
if i != -1:
result.append(i)
# result = set(result)
# result = list(result)
return result
(1) 暴力解法O(n^2):
顺序扫描字符串s,截取同等长度len(words) * len(words[0])的子串,分割成相同大小len(words[0])的单词,计算比较词频
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
from collections import Counter
result = []
if not words or not s:
return result
oneword_len = len(words[0])
allwords_len = oneword_len * len(words)
s_len = len(s)
words_counter = Counter(words)
for i in range(0, s_len - allwords_len + 1):
temp = s[i:allwords_len + i]
temp2 = []
for j in range(0, allwords_len, oneword_len):
temp2.append(temp[j:oneword_len + j])
if words_counter == Counter(temp2):
result.append(i)
return result
(2) 改进暴力解法O(n):
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
from collections import Counter
result = []
if not words or not s:
return result
oneword_len = len(words[0])
allwords_len = oneword_len * len(words)
s_len = len(s)
words_counter = Counter(words)
# 循环扫描字符串s,步长oneword_len,共扫描oneword_len次。
for i in range(0, oneword_len):
count = 0
left = right = i
cur_counter = Counter()
# 步长为oneword_len,向后扫描s,每次添加一个单词word
while right + oneword_len <= s_len:
word = s[right:right + oneword_len]
right += oneword_len
cur_counter[word] += 1
count += 1
# 若此轮添加的单词word的频率大于匹配字符串words中此word的频率,则删除左端的单词,以此减少频率
while cur_counter[word] > words_counter[word]:
left_word = s[left:left + oneword_len]
left += oneword_len
cur_counter[left_word] -= 1
count -= 1
# 匹配的长度相等,且各单词频率都与words中单词频率相同
if count == len(words):
result.append(left)
return result