/**
* 算法思想:
* 遍历所有以10为长度的字符串,边遍历边添加进哈希表,如果哈希表中存在,并且添加进结果的标志还未置位,
* 则将其添加进结果(注意cnt用来标识是否已经添加进结果);
* 如果哈希表中不存在,将当前遍历的字符串添加到哈希表中;
* 哈希值的计算方法要尽量将其均匀分布在哈希表中,否则容易超时。
*/
#define L 0x1ffff
int get(char c){
if(c == 'A') return 0;
if(c == 'C') return 1;
if(c == 'G') return 2;
return 3;
}
typedef struct hnode{
int key;
char *s;
int cnt;
struct hnode *next;
} HNode;
void hash_init(HNode ***htb, int len){
int i;
*htb = (HNode **)malloc(sizeof(HNode*) * len);
for(i=0; i<len; i++){
(*htb)[i] = NULL;
}
}
void hash_exit(HNode **htb, int len){
int i, cnt=0;
HNode *node, *last;
for(i=0; i<len; i++){
node=(htb)[i];
if(node) cnt++;
while(node){
last = node;
node = node->next;
free(last);
}
}
printf("len = %d, cnt = %d\n", len,cnt);
}
void hash_insert(HNode **hash_table, int key, char *s){
int index = key % L;
//printf("index = %d\n", index);
HNode *node = (HNode *)malloc(sizeof(HNode));
node->key = key;
node->s = s;
node->cnt = 0;
node->next = hash_table[index];
hash_table[index] = node;
}
HNode *hash_find(HNode **table, int key, char *s) {
int index = key % L;
HNode *p = table[index];
while(p){
if(!memcmp(p->s, s, sizeof(char)*10)){
break;
}
p = p->next;
}
return p;
}
char** findRepeatedDnaSequences(char* s, int* returnSize) {
int i, j, k;
int len = strlen(s);
char **ret;
ret = (char **)malloc(sizeof(char *) * 1000);
int index = 0;
int flag = 0, key;
HNode **htable, *node, *p;
hash_init(&htable, L);
*returnSize = 0;
if(len <= 9) return NULL;
int table[len-9][4];
memset(table, 0, sizeof(table));
for(i=0; i<len-9; i++){
/* 计算哈希值,尽量均匀落在哈希表中 */
table[i][0] = s[i] * s[i+9] + s[i+1] * s[i+8]+ s[i+2] * s[i+7] + s[i+3] * s[i+6] + s[i+4] * s[i+5];
table[i][0] += s[i] * s[i+8] + s[i+2] * s[i+9]+ s[i+2] * s[i+6] + s[i+3] * s[i+5] + s[i+4] * s[i+7];
}
printf("len=%d\n", len);
for(i=0; i<len-9; i++){
key = table[i][0];
p = hash_find(htable, key, &s[i]);
if(!p) {
hash_insert(htable, key, &s[i]);
}else{
if(!p->cnt) {
p->cnt = 1;
ret[index] = (char *)malloc(sizeof(char) * 11);
memcpy(ret[index], p->s, 10);
ret[index++][10]= '\0';
}
}
}
hash_exit(htable, L);
*returnSize = index;
return ret;
}
leetcode-187. 重复的DNA序列-C语言
最新推荐文章于 2022-03-15 16:02:48 发布