All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT", Return: ["AAAAACCCCC", "CCCCCAAAAA"].
题目意思是有一个DNA序列,10个为一串,找到重复的串,刚开始用map存string,然后超内存,然后看标签知道要用位运算,但是每次都是先存在一个stirng中,再转为二进制,超时,其实每次加新的时候前9个是已知的,再通过位运算接上就行了。
class Solution {
public:
int change(char x)
{
if (x == 'A') return 0;
if (x == 'T') return 1;
if (x == 'C') return 2;
if (x == 'G') return 3;
}
vector<string> findRepeatedDnaSequences(string s) {
vector<string>res;
string temp;
unsigned int n=0;
map<int, int>Hash;
int size = s.size();
if (size < 11) return res;
for (int i = 0; i < 10; i++)
{
n = n << 2;
n = n | change(s[i]);
}
Hash[n] = 0;
for (int j = 10; j < size; j++)
{
n = n << 14;
n = n >> 12;
n = n | change(s[j]);
if (Hash.find(n) != Hash.end())//防止出现多次而添加多次
{
if (Hash[n] == 0)
{
Hash[n] = -1;
temp = s.substr(j - 9, 10);
res.push_back(temp);
}
}
else
{
Hash[n] = 0;
}
}
return res;
}
};