Leetcode 187. Repeated DNA Sequences
All DNA is composed of a series of nucleotides abbreviated as A, C, G,
and T, for example: “ACGAATTCCG”. When studying DNA, it is sometimes
useful to identify repeated sequences within the DNA.Write a function to find all the 10-letter-long sequences (substrings)
that occur more than once in a DNA molecule.Example:
Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
Output: ["AAAAACCCCC", "CCCCCAAAAA"]
普通方法
哈希map存入每个长度为10 的子串的出现次数,最后遍历哈希map,找出出现1次以上的
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
vector<string>result;
if(s.length()<=10)
return result;
string sub;
map<string,int>hash_map;
for(int i = 0;i<s.length()-10;i++)
{
sub = s.substr(i,10);
if(hash_map.find(sub) == hash_map.end())
{
hash_map[sub] = 1;
}
else
{
hash_map[sub] += 1;
}
}
map<string,int>::iterator it;
for(it = hash_map.begin();it!=hash_map.end();it++)
{
if(it->second > 1)
result.push_back(it->first);
}
return result;
}
};
使用位操作
由于只有4个字母,所以我们可以把A,C,G,T分别看做 00,01,10,11的二进制,然后10个字母就有20个比特位,因此一个10个字母的子串就变成了一个数字。用一个集合来保存出现一次的数值,用另一个集合来保存出现两次以上的数值。
C++版本
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
set<int> words;
set<int>twoWords;
vector<string> rv;
char map[26] = {0};
//map['A' - 'A'] = 0;
map['C' - 'A'] = 1;
map['G' - 'A'] = 2;
map['T' - 'A'] = 3;
if(s.length() <= 10)
return rv;
for(int i = 0; i < s.length() - 9; i++) {
int v = 0;
for(int j = i; j < i + 10; j++) {
v <<= 2;
v |= map[s[j] - 'A'];
}
if(words.find(v)!=words.end() && twoWords.find(v)==twoWords.end())
{
rv.push_back(s.substr(i,10));
twoWords.insert(v);
}
else
words.insert(v);
}
return rv;
}
};
JAVA版本
public List<String> findRepeatedDnaSequences(String s) {
Set<Integer> words = new HashSet<>();
Set<Integer> doubleWords = new HashSet<>();
List<String> rv = new ArrayList<>();
char[] map = new char[26];
//map['A' - 'A'] = 0;
map['C' - 'A'] = 1;
map['G' - 'A'] = 2;
map['T' - 'A'] = 3;
for(int i = 0; i < s.length() - 9; i++) {
int v = 0;
for(int j = i; j < i + 10; j++) {
v <<= 2;
v |= map[s.charAt(j) - 'A'];
}
if(!words.add(v) && doubleWords.add(v)) {
rv.add(s.substring(i, i + 10));
}
}
return rv;
}