# 187. Repeated DNA Sequences

All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

For example,

Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT",

Return:
["AAAAACCCCC", "CCCCCAAAAA"].

DNA序列中只存在A、C、G、T四中字符，四种字符所对应的ASCII码分别为：65、67、71、84，二进制表示为 1000001, 1000011, 1000111, 1010100，八进制表示为0101、0103、0107、0124，可以看到只需要3位就可以区分这四个字母。则10个字符只需要30位来表示即可，之后将30位转化为int型作为map的key存储在map中。map的值为该种10字符长的子串所对应出现的次数。之后对map的值（出现的次数）进行判断即可得出重复的10字符子串。

class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {

vector<string> result;
unordered_map<int, int> map;
int substr = 0;
int i = 0;
int n = s.size();
while(i<9){ //获取字符串前十个字符组成的子串
substr = substr<<3 | s[i++]&7;
}
while(i<n){ //依次将后面的每个字符加入到前子串中组成新子串，查看map中是否有重复的，没有就新加，有就加1
if(map[substr = substr<<3 & 0x3FFFFFFF | s[i++]&7]++ == 1){
result.push_back(s.substr(i-10, 10));
}
}
return result;
}
};

java版：67ms

public class Solution {
public List<String> findRepeatedDnaSequences(String s) {

List<String> res = new ArrayList<String>();
Map<Integer, Integer> map = new HashMap<Integer, Integer>();
int substr = 0;
for(int i=0; i<s.length(); i++){ //对字符串中每个字符进行轮训
substr = ((substr<<3) | (s.charAt(i)&0x7)) & 0x3fffffff;  //保存10字符长的子串
if(i<9) continue; //前十个字符不做后面的操作
if(map.get(substr)==null){ //map中不存在，则保存
map.put(substr, 1);
}else if(map.get(substr)==1){ //map中存在则将子串放入结果集，并加1
map.put(substr, 2);
}
}
return res;
}
}


public class Solution {
public List<String> findRepeatedDnaSequences(String s) {

Set<Integer> words = new HashSet<>();
Set<Integer> doubleWords = new HashSet<>();
List<String> res = new ArrayList<>();
char[] map = new char[26];
//map['A'-'A'] = 0; 将字符转换为数字映射
map['C'-'A'] = 1;
map['G'-'A'] = 2;
map['T'-'A'] = 3;

for(int i=0; i<s.length()-9; i++){
int str = 0;
for(int j=i; j<i+10; j++){  //找到当前十个字符的子串
str <<= 2;
str |= map[s.charAt(j)-'A'];
}