All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT", Return: ["AAAAACCCCC", "CCCCCAAAAA"].
#include<iostream>
#include<vector>
#include<map>
#include<string>
using namespace std;
//用map来做,结果: Memory Limit Exceeded
vector<string> findRepeatedDnaSequences(string s) {
vector<string> ResultString;
map<string, int> MapStringCount;
if (s.size()<=10)
return ResultString;
for (int i = 0; i != s.size() - 10;++i){
if (!MapStringCount.count(s.substr(i, 10)))
MapStringCount.insert(make_pair(s.substr(i, 10), 1));
else
{
if (MapStringCount[s.substr(i, 10)] == 1)
ResultString.push_back(s.substr(i, 10));
MapStringCount[s.substr(i, 10)]++;
}
}
return ResultString;
}
//改进hashkey
//利用位计算来实现hashkey
int myhashkey(string s)
{
int n = 0;
for (int i = 0; i != s.size();++i)
{
n <<= 2;
if (s[i]=='C')
n += 1;
else if (s[i] == 'G')
n += 2;
else if (s[i] == 'T')
n += 3;
}
return n;
}
vector<string> findRepeatedDnaSequences(string s) {
vector<string> ResultString;
map<int, int> MapStringCount;
if (s.size() <= 10)
return ResultString;
for (int i = 0; i <= s.size() - 10; ++i){
if (!MapStringCount.count(myhashkey(s.substr(i, 10))))
MapStringCount.insert(make_pair(myhashkey(s.substr(i, 10)), 1));
else
{
if (MapStringCount[myhashkey(s.substr(i, 10))] == 1)
ResultString.push_back(s.substr(i, 10));
MapStringCount[myhashkey(s.substr(i, 10))]++;
}
}
return ResultString;
}