All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT", Return: ["AAAAACCCCC", "CCCCCAAAAA"].
思路:HashMap<String,int> <substring, repeatTimes>
public class Solution {
public List<String> findRepeatedDnaSequences(String s) {
List<String> result = new ArrayList<String>();
int seq_len = 10;
if(s==null||s.length()<seq_len) return result;
HashMap<String,Integer> strList = new HashMap<String,Integer>(s.length());
for(int j=0;j<=s.length()-seq_len;j++){
String curstr = s.substring(j,j+seq_len);
if(strList.containsKey(curstr)){
strList.put(curstr, strList.get(curstr)+1);
}
else{
strList.put(curstr, 1);
}
}
findRepeatedSeq(result, strList);
return result;
}
public void findRepeatedSeq(List<String> result, HashMap<String,Integer> strList){
Set set = strList.entrySet();
// Get an iterator
Iterator i = set.iterator();
// Display elements
while(i.hasNext()) {
Map.Entry me = (Map.Entry)i.next();
if((int) me.getValue()>1) result.add((String)me.getKey());
}
}
}