All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT", Return: ["AAAAACCCCC", "CCCCCAAAAA"].
Difficulty: Medium
Solution: Two naive solutions using hashtable and hashmap. They are almost the same thing. The best solution is using bitmap:
Link: http://www.programcreek.com/2014/03/leetcode-repeated-dna-sequences-java/
HashMap:
public class Solution {
public List<String> findRepeatedDnaSequences(String s) {
HashMap<String, Integer> ht = new HashMap<String, Integer>();
List<String> res = new ArrayList<String>();
int len = s.length();
for(int i = 0; i <= len - 10; i++){
String temp = s.substring(i, i + 10);
if(!ht.containsKey(temp)){
ht.put(temp, 1);
}
else{
ht.put(temp, ht.get(temp) + 1);
}
}
for (Map.Entry<String, Integer> entry : ht.entrySet()) {
String key = entry.getKey();
int value = entry.getValue();
if(value > 1)
res.add(key);
}
return res;
}
}
import java.util.Hashtable;
import java.util.Enumeration;
public class Solution {
public List<String> findRepeatedDnaSequences(String s) {
Hashtable<String, Integer> ht = new Hashtable<String, Integer>();
List<String> res = new ArrayList<String>();
int len = s.length();
for(int i = 0; i <= len - 10; i++){
String temp = s.substring(i, i + 10);
if(!ht.containsKey(temp)){
ht.put(temp, 1);
}
else{
ht.put(temp, ht.get(temp) + 1);
}
}
Enumeration<String> enumKey = ht.keys();
while(enumKey.hasMoreElements()) {
String key = enumKey.nextElement();
if(ht.get(key) > 1)
res.add(key);
}
return res;
}
}