/*文件命名:match.c
*/
#include #include #include #include #include #define ASCII_SIZE 256
#define SUCCESS 0
#define ERROR -1
#define RABIN_KARP_BASE 2
struct BM_bad_character_node {
int pos;
struct BM_bad_character_node *next;
};
void brute_force(char *T, char *P) {
int i,j,total=0;
if (T == NULL || P == NULL)
return;
for (i=j=0; T[i] != '\0'; ) {
total ++;
if (j < 0 || T[i] == P[j]) {
i++; j++;
if (P[j] == '\0') { // 找到了
printf("Brute-Force 在位置 %d 找到 %s, 总比对次数=%d\n",i-j,P,total);
return;
}
}
else {
i = i - j;
j = -1;
}
}
printf("Brute-Force 找不到 %s, 总比对次数=%d\n",P,total);
return;
} // end of brute_force()
void preprocess_KMP(char *P, int *next) {
int k=-1,i=1,nLen=0;
if (P == NULL)
return;
nLen = strlen(P);
next[0]=k;
for (i=0; inext[i+1] = next[i] + 1;
while (next[i+1] > 0 && P[next[i+1]-1] != P[i]) {
next[i+1] = next[next[i+1]-1]+1;
}
}
} // end of preprocess_KMP()
void kmp(char *T, char *P, int *next) {
int i,j,total=0;
if (T == NULL || P == NULL || next == NULL)
return;
for (i=j=0; T[i] != '\0'; ) {
total ++;
if (j<0 || T[i] == P[j]) {
++i; ++j;
if (P[j] == '\0') { // 找到了
printf("KMP 在位置 %d 找到 %s, 总比对次数=%d\n",i-j,P,total);
return;
}
}
else
j = next[j];
}
printf("KMP 找不到 %s, 总比对次数=%d\n",P,total);
return;
} // end of kmp
int bad_character_BM(char *p, struct BM_bad_character_node *BM_head[ASCII_SIZE]) {
unsigned char uc;
char *pCh=NULL;
int nPos = 0;
struct BM_bad_character_node *ptr=NULL;
if (p == NULL)
return ERROR;
pCh = p;
while (*pCh != '\0') {
uc = (unsigned char)*pCh;
ptr = (struct BM_bad_character_node *)malloc(sizeof(struct BM_bad_character_node));
if (ptr == NULL)
return ERROR;
ptr->pos = nPos;
ptr->next = BM_head[uc];
BM_head[uc] = ptr;
nPos ++;
pCh ++;
}
return SUCCESS;
} // end of bad_character_BM()
void good_suffix_BM(char *p, int *good_suffix) {
int nLen=0,i;
char *pCh=NULL,*pCh2=NULL,*pCh3=NULL;
if (p == NULL || good_suffix == NULL)
return;
nLen = strlen(p);
for (i=nLen-2;i>=0;i--) {
pCh = p+i+1;
pCh2 = p;
while ((pCh3 = strstr(pCh2,pCh)) != NULL) {
if (pCh3 < pCh)
pCh2 = pCh3+1;
else
break;
}
if (pCh2 == p) // can't find pCh anymore
good_suffix[i] = nLen;
else {
pCh2 --;
good_suffix[i] = pCh-pCh2;
}
//printf("good_suffix[%d]=%d, pCh=%s, pCh2=%s\n",i,good_suffix[i],pCh,pCh2);
}
} // end of good_suffix_BM()
void bm(char *T, char *P, struct BM_bad_character_node *BM_head[ASCII_SIZE], int *good_suffix) {
int i,j,nLen=0,nTextLen=0;
int total = 0;
int nBadCharMove=0, nGoodSuffixMove=0, nMove=0;
unsigned char uc;
struct BM_bad_character_node *ptr=NULL;
if (T == NULL || P == NULL || good_suffix == NULL)
return;
nLen = strlen(P);
nTextLen = strlen(T);
i = j = nLen-1;
while(i < nTextLen) {
while(T[i] == P[j]) {
total ++;
if (j == 0) {
printf("BM 在位置 %d 找到 %s, 总比对次数=%d\n",i,P,total);
return;
}
i--; j--;
}
total ++;
// 计算 bad_character_rule 位移
uc = (unsigned char)T[i];
ptr = BM_head[uc];
while (ptr) {
if (ptr->pos < j)
break;
ptr = ptr->next;
}
if (ptr == NULL)
nBadCharMove = nLen;
else
nBadCharMove = j-ptr->pos;
// 计算 good_suffix_rule 位移
nGoodSuffixMove = good_suffix[j];
// 比较 move 格数
if (nGoodSuffixMove > nBadCharMove)
nMove = nGoodSuffixMove;
else
nMove = nBadCharMove;
// 移动箭头
i = i + nMove + (nLen-1-j);
j = nLen-1;
}
printf("BM 找不到 %s, 总比对次数=%d\n",P,total);
} // end of bm()
void Rabin_Karp(char *T, char *P) {
unsigned int sumText=0,sumPattern=0,maxFactor=0;
long long int llsum=0;
int nLen=0,nTextLen=0,i,j,total=0;
if (T == NULL || P == NULL)
return;
nLen = strlen(P);
nTextLen = strlen(T);
// 取得 Pattern 的 Hash, overflow 就算了
for (i=0;isumPattern *= RABIN_KARP_BASE;
sumPattern += P[i];
if (maxFactor == 0)
maxFactor = 1;
else
maxFactor *= RABIN_KARP_BASE;
}
for (i=0;isumText *= RABIN_KARP_BASE;
sumText += T[i];
}
while (i < nTextLen) {
total ++;
//printf("sumText=%u, sumPattern=%u, maxFactor=%d\n",sumText, sumPattern, maxFactor);
if (sumText == sumPattern) { // Hash 相同, 仔细比对
for (j=0;jtotal ++;
if (P[j] != T[i-nLen+j])
break;
}
if (j == nLen) { // 找到了
printf("Rabin-Karp 在位置 %d 找到 %s, 总比对次数=%d\n",i-j,P,total);
return;
}
}
// 不相同, Text 退一个字再加一个字
//printf("i=%d, 退 %c 加 %c\n",i,T[i-nLen],T[i]);
sumText = sumText - T[i-nLen] * maxFactor;
sumText = sumText * RABIN_KARP_BASE;
sumText = sumText + T[i];
i++;
}
printf("Rabin-Karp 找不到 %s, 总比对次数=%d\n",P,total);
} // end of Rabin_Karp()
int main(int argc, char *argv[]) {
int *next=NULL; // KMP 使用
char *p=NULL,*t=NULL;
int i,nLen=0;
FILE *fp=NULL;
clock_t begin=0,end=0;
int *good_suffix = NULL; // BM 使用
struct BM_bad_character_node *BM_head[ASCII_SIZE]; // BM 使用
struct BM_bad_character_node *ptr=NULL; // BM 使用
if (argc != 3) {
printf("请使用 string_matching 档案名称 要寻找的文字\n");
return;
}
memset(BM_head,0,sizeof(struct BM_bad_character_node *)*ASCII_SIZE); // BM 使用
nLen = strlen(argv[2]);
if ((next = (int *)malloc(sizeof(int)*(nLen+1))) == NULL)
goto errexit;
/
// 读入 Pattern 文字
/
if ((p = (char *)malloc(sizeof(char)*(nLen+1))) == NULL)
goto errexit;
strncpy(p,argv[2],nLen);
p[nLen] = '\0';
if ((fp = fopen(argv[1],"r")) == NULL)
goto errexit;
/
// 读入 Text 档案
/
fseek(fp,0,SEEK_END);
nLen = ftell(fp);
rewind(fp);
if ((t = (char *)malloc(sizeof(char)*(nLen+1))) == NULL)
goto errexit;
nLen = fread(t,1,nLen,fp);
t[nLen] = '\0';
/
// Brute-Force search
/
begin = clock();
brute_force(t,p);
end = clock();
printf("Time of Brute-Force is %f\n",(float)(end-begin)/CLOCKS_PER_SEC);
/
// KMP search
/
preprocess_KMP(p,next);
begin = clock();
kmp(t,p,next);
end = clock();
printf("Time of KMP is %f\n",(float)(end-begin)/CLOCKS_PER_SEC);
/
// BM search
/
nLen = strlen(p);
if ((good_suffix = (int *)malloc(sizeof(int)*nLen)) == NULL)
goto errexit;
memset(good_suffix,0,sizeof(int)*nLen);
if (bad_character_BM(p,BM_head) != SUCCESS)
goto errexit;
good_suffix_BM(p,good_suffix);
begin = clock();
bm(t,p,BM_head,good_suffix);
end = clock();
printf("Time of BM is %f\n",(float)(end-begin)/CLOCKS_PER_SEC);
/
// Rabin-Karp search
/
begin = clock();
Rabin_Karp(t,p);
end = clock();
printf("Time of Rabin-Karp is %f\n",(float)(end-begin)/CLOCKS_PER_SEC);
errexit:
if (fp) {
fclose(fp);
fp = NULL;
}
if (p) {
free(p);
p = NULL;
}
if (t) {
free(t);
t = NULL;
}
if (next) {
free(next);
next = NULL;
}
if (good_suffix) {
free(good_suffix);
good_suffix = NULL;
}
for (i=0;iptr = BM_head[i];
while(ptr) {
BM_head[i] = BM_head[i]->next;
free(ptr);
ptr = BM_head[i];
}
ptr = NULL;
}
} // end of main()