数据结构与算法——哈希表与字符串

最新推荐文章于 2023-08-30 16:09:50 发布

小屋*

最新推荐文章于 2023-08-30 16:09:50 发布

阅读量418

点赞数

分类专栏：数据结构与算法文章标签：散列表数据结构算法

本文链接：https://blog.csdn.net/weixin_48994268/article/details/123376756

版权

数据结构与算法专栏收录该内容

42 篇文章 2 订阅

订阅专栏

本文通过六个例子详细介绍了哈希表在解决字符串问题中的应用，包括查找最长回文串、匹配词语模式、分组同字符词语、找到无重复字符的最长子串、搜索重复的DNA序列以及找到最小窗口子串。这些例子展示了哈希表在高效地处理字符串信息和优化算法性能方面的重要性。

摘要由CSDN通过智能技术生成

预备知识:哈希表基础知识
例1:最长回文串(easy)(字符哈希)
例2:词语模式(easy)(字串符哈希)
例3:同字符词语分组(medium)(数组哈希)
例4:无重复字符的最长子串(medium)(字符哈希)
例5:重复的DNA序列(medium)(字串符哈希)
例6:最小窗口子串(hard)(哈希维护窗口)

预备知识:哈希表基础知识

在这里插入图片描述

例1:最长回文串(easy)(字符哈希)

在这里插入图片描述

class Solution {
public:
    int longestPalindrome(std::string s) {
     int char_map[128] = {0};
     int max_length = 0;
     int flag = 0;
     for (int i = 0; i < s.length(); i++){
      char_map[s[i]]++;
     }
     for (int i = 0; i < 128; i++){
      if (char_map[i] % 2 == 0){
       max_length += char_map[i];
      }
      else{
       max_length += char_map[i] - 1;
       flag = 1;
      }
     }
     return max_length + flag;
    }
};

例2:词语模式(easy)(字串符哈希)

在这里插入图片描述

class Solution {
public:
 bool wordPattern(string pattern, string s) {
  std::map<std::string, char> word_map;
  char used[128] = { 0 };
  std::string word;
  int pos = 0;
  s.push_back(' ');
  for (int i = 0; i < s.length(); i++) {
   if (s[i] == ' ') {
    if (pos == pattern.length()) {
     return false;
    }
    if (word_map.find(word) == word_map.end()) {
     if (used[pattern[pos]]) {
      return false;
     }
     word_map[word] = pattern[pos];
     used[pattern[pos]] = 1;
    }
    else {
     if (word_map[word] != pattern[pos]) {
      return false;
     }
    }
    word = "";
    pos++;
   }
   else {
    word += s[i];
   }
  }
  if (pos != pattern.length()) {
   return false;
  }
  return true;
 }
};

class Solution:
    def wordPattern(self, pattern: str, s: str) -> bool:
        word2ch = {}
        ch2word = {}
        words = s.split()
        if len(pattern) != len(words):
            return False
        for ch, word in zip(pattern, words):
            if (word in word2ch and word2ch[word] != ch) or (ch in ch2word and ch2word[ch] != word):
                return False
            word2ch[word] = ch
            ch2word[ch] = word
        return True

例3:同字符词语分组(medium)(数组哈希)

在这里插入图片描述

class Solution {
public:
 vector<vector<string>> groupAnagrams(vector<string>& strs) {
  map<string, vector<string>>anagram;
  vector<vector<string>> result;
  for (int i = 0; i < strs.size(); i++) {
   string str = strs[i];
   sort(str.begin(), str.end());
   if (anagram.find(str) == anagram.end()) {
    vector<string>item;
    anagram[str] = item;
   }
   anagram[str].push_back(strs[i]);
  }
  map<string, vector<string>>::iterator it;
  for (it = anagram.begin(); it != anagram.end(); it++) {
   result.push_back((*it).second);
  }
  return result;
 }
};

class Solution:
    def groupAnagrams(self, strs):
        result = {}
        reslut_list=[]
        for i in strs:
            list_i = []
            for j in range(len(i)):
                list_i.append(i[j])
                list_i.sort()

            if str(list_i) not in result:
                result[str(list_i)] = []
                result[str(list_i)].append(i)
            else:
                result[str(list_i)].append(i)
        for k,v in result.items():
            reslut_list.append(v)

        print(reslut_list)


if __name__ == "__main__":
    s = Solution()
    result = s.groupAnagrams(strs=["eat", "tea", "tan", "ate", "nat", "bat"])
    print(result)

例4:无重复字符的最长子串(medium)(字符哈希)

在这里插入图片描述

class Solution {
public:
 int lengthOfLongestSubstring(string s) {
  int begin = 0;
  int result = 0;
  string word = "";
  int char_map[128] = { 0 };
  for (int i = 0; i < s.length(); i++) {
   char_map[s[i]]++;
   if (char_map[s[i]] == 1) {
    word += s[i];
    if (result < word.length()) {
     result = word.length();
    }
   }
   else {
    while (begin < i && char_map[s[i]]>1) {
     char_map[s[begin]]--;
     begin++;
    }
    word = "";
    for (int j = begin; j <= i; j++) {
     word += s[j];
    }
   }
  }
  return result;
 }
};

class Solution(object):
    def lengthOfLongestSubstring(self, strs):
        """
        :type s: str
        :rtype: int
        """
        begin=0
        result=0
        word=""
        char_dict={}
        for i in range(len(strs)):
            if strs[i] not in char_dict:
                char_dict[strs[i]]=1
            else:
                char_dict[strs[i]]+=1
            if char_dict[strs[i]]==1:
                word+=strs[i];
                if result<len(word):
                    result=len(word)
            else:
                while begin<i and char_dict[strs[i]]>1:
                    char_dict[strs[begin]]-=1
                    begin+=1
                    word=""
                    for id in range(begin,i+1):
                        word+=strs[id]
        return result

例5:重复的DNA序列(medium)(字串符哈希)

在这里插入图片描述

class Solution {
 const int L = 10;
public:
 vector<string> findRepeatedDnaSequences(string s) {
  vector<string> ans;
  unordered_map<string, int> cnt;
  int n = s.length();
  for (int i = 0; i <= n - L; ++i) {
   string sub = s.substr(i, L);
   if (++cnt[sub] == 2) {
    ans.push_back(sub);
   }
  }
  return ans;
 }
};

class Solution:
    def findRepeatedDnaSequences(self, s: str) -> List[str]:
        L = 10
        ans = []
        cnt = defaultdict(int)
        for i in range(len(s) - L + 1):
            sub = s[i: i + L]
            cnt[sub] += 1
            if cnt[sub] == 2:
                ans.append(sub)
        return ans

例6:最小窗口子串(hard)(哈希维护窗口)

在这里插入图片描述

class Solution {
public:
 string minWindow(string s, string t) {
  const int MAX_ARRAY_LEN = 128;
  int map_t[MAX_ARRAY_LEN] = { 0 };
  int map_s[MAX_ARRAY_LEN] = { 0 };
  std::vector<int> vec_t;
  for (int i = 0; i < t.length(); i++) {
   map_t[t[i]]++;
  }
  for (int i = 0; i < MAX_ARRAY_LEN; i++) {
   if (map_t[i] > 0) {
    vec_t.push_back(i);
   }
  }
  int window_begin = 0;
  std::string result;
  for (int i = 0; i < s.length(); i++) {
   map_s[s[i]]++;
   while (window_begin < i) {
    char begin_ch = s[window_begin];
    if (map_t[begin_ch] == 0) {
     window_begin++;
    }
    else if (map_s[begin_ch] > map_t[begin_ch]) {
     map_s[begin_ch]--;
     window_begin++;
    }
    else {
     break;
    }
   }
   if (is_window_ok(map_s, map_t, vec_t)) {
    int new_window_len = i - window_begin + 1;
    if (result == "" || result.length() > new_window_len) {
     result = s.substr(window_begin, new_window_len);
    }
   }
  }
  return result;
 }
private:
 bool is_window_ok(int map_s[], int map_t[], vector<int>& vec_t) {
  for (int i = 0; i < vec_t.size(); i++) {
   if (map_s[vec_t[i]] < map_t[vec_t[i]]) {
    return false;
   }
  }
  return true;
 }
};

from collections import defaultdict

class Solution:
    #判断是否包含t中所有的元素
    def is_window_min(self,s_dict,t_dict,t_list):
        for i in range(len(t_list)):
            if s_dict[t_list[i]]<t_dict[t_list[i]]:
                return False
        return True

    def minWindow(self,s: str, t: str):
        t_dict=defaultdict(int)
        s_dict=defaultdict(int)
        t_list=[]
        #将t中的元素都在t_dict中记录
        for i1 in range(len(t)):
            t_dict[t[i1]]+=1
        #记录t中出现了哪些字符
        for i2 in range(len(t)):
            if t[i2] not in t_list:
                t_list.append(t[i2])
        window_begin=0
        result=''
        for i in range(len(s)):
            s_dict[s[i]]+=1
            while window_begin<i:
                begin_ch=s[window_begin]
                if t_dict[begin_ch]==0:
                    window_begin+=1
                elif s_dict[begin_ch]>t_dict[begin_ch]:
                    s_dict[begin_ch]-=1
                    window_begin+=1
                else:
                    break
            if self.is_window_min(s_dict,t_dict,t_list):
                new_windows_len=i-window_begin+1
                if result=='' or len(result)>new_windows_len:
                    result=s[window_begin:i+1]
        return result

if __name__=="__main__":
    s=Solution()
    # print(s.findRepeatedDnaSequences("AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"))
    # print(s.minWindow("ADOBECODEBANC", "ABC"))
    # print(s.minWindow("C", "ABC"))
    # print(s.minWindow("ABv", "ABC"))
    # print(s.minWindow("ab", "a"))
    # print(s.minWindow("ab", "b"))
    # print(s.minWindow("aa", "aaa"))
    # print(s.minWindow("bba", "ab"))
    print(s.minWindow("bbab", "abb"))