# coding='utf8'
'''
子串定义:连续的一串
思路:
1. 动态规划
2. 滑动窗口
'''
from collections import defaultdict
def longest_palindrome(s):
'''
最长回文子串
leetcode: 5
input: babad
output: 3
思路:
1. 从中间开始向两边扩散来判断回文串
2. 分别以[i,i]和[i,i+1]为中心向两边进行扩展
3. 记录结果
'''
n = len(s)
def palindrome(s, l, r):
while l >= 0 and r < n and s[l] == s[r]:
l -= 1
r += 1
return s[l + 1:r]
res = ""
for i in range(n):
s1 = palindrome(s, i, i)
s2 = palindrome(s, i, i + 1)
res = res if len(res) > len(s1) else s1
res = res if len(res) > len(s2) else s2
print(res)
return res
def minimum_window_substring(s, t):
'''
最小覆盖子串, 从s字符串中截取包含t字符串中字符元素的字符串,使得这个字符串长度最小
leetcode:76
input: s = "ADOBECODEBANC", t = "ABC"
output: "BANC"
思路:
1. 设定[l, r)窗口,从[0, 1)开始滑动
2. 当s[l...r]不包含t中字符时,尝试r++
3. 当s[l...r]包含了t中字符时,尝试l--
4. 如果窗口包含整个t字符串的字符,则记录下此时的长度及t的子串,更新l=r,r=r+1,重复2和3
'''
# needs和window相当于计数器,分别记录s和t的字符出现次数
window, needs = defaultdict(int), defaultdict(int)
for i in range(len(t)):
needs[t[i]] += 1
l, r, valid = 0, 0, 0
start, length = 0, float('inf')
while r < len(s):
c = s[r]
r += 1
if c in needs:
window[c] += 1
if window[c] == needs[c]:
valid += 1
while valid == len(needs):
if r - l < length:
# 当左右边界差值小于长度时,记录起始点和长度
start = l
length = r - l
d = s[l]
l += 1
if d in needs:
if window[d] == needs[d]:
valid -= 1
window[d] -= 1
return "" if length == float('inf') else s[start:start + length]
def permutation_in_string(s1, s2):
'''
字符串全排列, s1的某个排列是s2的子串
leetcode: 567
input: s1 = "ab" s2 = "eidbaooo"
output: true
思路:
1. 设定window need
2. 设定l r valid,分别代表左右边界和满足条件的个数
3. 扩充右边界r,对新增的字符判断和计数
4. 当左右边界达到s1的长度时,需要判断valid是否与need长度相等
5. 收缩左边界
'''
window, need = defaultdict(int), defaultdict(int)
for i in range(len(s1)):
need[s1[i]] += 1
l, r, valid = 0, 0, 0
while r < len(s2):
c = s2[r]
r += 1
if c in need:
window[c] += 1
if window[c] == need[c]:
valid += 1
while r - l >= len(s1): # 排列组合,因此s1的长度应小于等于窗口值
if valid == len(need): # 说明窗口中就是一个合法的排列
return True
d = s2[l]
l += 1
if d in need:
if window[d] == need[d]:
valid -= 1
window[d] -= 1
return False
def find_anagrams(s, p):
'''
在字符串s中,找到p的异位词,其实就是找p的全排列,并将所有的起始索引返回
leetcode: 438
input: s = "cbaebabacd", p = "abc"
output: [0,6]
思路:
1. 所谓异位词就是全排列,使用滑动窗口找到所有的全排列
2. 设定计数哈希表window和need,其中window存放s,need存放p
3. 设定l r valid,分别存放左右边界和符合条件的计数,另设定res,用于存放结果
4. 扩展右边界,通过window记录s中包含need key值的个数,当window和need对于同一个字符个数相等时,有效计数valid加1
5. 收缩左边界,当左右边界差值大于等于p的长度时,说明s的子串长度大于等于p的长度,需要收缩
'''
window, need = defaultdict(int), defaultdict(int)
for i in range(len(p)):
need[p[i]] += 1
l, r, valid, res = 0, 0, 0, []
while r < len(s):
c = s[r]
r += 1
if c in need:
window[c] += 1
if window[c] == need[c]:
valid += 1
while r - l >= len(p):
if valid == len(need):
res.append(l)
d = s[l]
l += 1
if d in need:
if window[d] == need[d]:
valid -= 1
window[d] -= 1
return res
def length_of_longest_substring(s):
'''
最长无重复的子串:给定⼀个字符串 s,请你找出其中不含有重复字符的最⻓⼦串的⻓度
leetcode: 3
input: s = "abcabcbb"
output: 3
思路:
1. 设定哈希表window,用来存放不同字符的计数
2. 设定l r res 分别代表左右边界和结果
3. 扩展右边界
4. 收缩左边界,当window中有一个字符的计数值大于1时,说明有重复字符,需要收缩边界
5. 记录res,只要记录res与历史记录中的最大值即可
'''
window = defaultdict(int)
l, r, res = 0, 0, 0
while r < len(s):
c = s[r]
window[c] += 1
r += 1
while window[c] > 1: # 始终保持window[c]的值<=1
d = s[l]
l += 1
window[d] -= 1 # 这里使得window[c]的值减小到1
res = max(res, r - l)
return res
if __name__ == "__main__":
# longest_palindrome("abbbas")
# print(minimum_window_substring("ADOBECODEBANC", "ABC"))
# print(permutation_in_string("ab", "eabsasess"))
# print(permutation_in_string("ab", "ebsasess"))
# print(find_anagrams("cabsssbabacd", "abc"))
print(length_of_longest_substring("abcabcbb"))
【算法-面试】子串专题
最新推荐文章于 2024-07-24 14:54:00 发布