C语言字符匹配
- 暴力BF
- 哈希RK
- KMP
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#define MAXLEN 255
//模式串的匹配
//顺序串
typedef struct {
char* ch;
int length;
}SString;
//串的初始化(主串)
void initStringa(SString* s) {
//s->ch = "GTGTGAGCTGGTGTGTGCFAA";
s->ch = "aabaabaabaaf";
s->length = 21;
}
//串的初始化(模式串)
void initStringb(SString* s) {
//s->ch = "GTGTGCF";
s->ch = "aabaaf";
s->length = 7;
}
//BF(暴力 时间复杂度:mn)
bool BF(SString* s, SString* pattern) {
int j = 0; //模式串的下标
for (int i = 0; i < s->length;)
{
if (s->ch[i] == pattern->ch[j])
{
i++;
j++;
if (j == pattern->length)
{
return true;
}
else
{
continue;
}
}
else
{
i = i - j + 1;
j = 0;
continue;
}
}
return false;
}
//RK(利用哈希值先进行筛选 时间复杂度:n)============start
//获取哈希值
int getHash(SString* s, int index, int length) {
int hashVal = 0;
for (int i = 0; i < length; i++)
{
hashVal += s->ch[i + index] - 'A';
}
return hashVal;
}
//比较主串具体下标开始,模式串长度的子串与模式串是否匹配
bool specificSToPattern(SString* s, int index, SString* pattern) {
for (int i = 0; i < pattern->length; i++)
{
if (s->ch[i + index] != pattern->ch[i])
{
return false;
}
}
return true;
}
//获取主串下一个哈希值(index下一个位置的哈希值)
int getNextHash(int sHash, char head, char tail) {
return sHash - (head - 'A') + (tail - 'A');
}
bool RK(SString* s, SString* pattern) {
//获取模式串的哈希值
int pHash = getHash(pattern, 0, pattern->length);
printf("%d=====模式串的哈希值\n", pHash);
//获取主串对应长度的哈希值
int sHash = getHash(s, 0, pattern->length);
printf("%d=====主串的哈希值\n", sHash);
for (int i = 0; i < s->length - pattern->length + 1; i++)
{
if (sHash == pHash)
{
//开始比较主串从下标i开始到pattern长度的字符是否与模式串匹配
bool flag = specificSToPattern(s, i, pattern);
if (flag) {
printf("匹配成功\n");
return true;
}
else
{
continue;
}
}
else
{
//获取主串下一位置的哈希值
sHash = getNextHash(sHash, s->ch[i], s->ch[i + pattern->length]);
printf("主串的第%d个位置的哈希值======>%d\n", i + 1, sHash);
}
}
return false;
}
//RK(利用哈希值先进行筛选 时间复杂度:n)============end
//KMP(t主要是利用next数组来进行匹配回溯,这里面关键是主串不需要回溯,时间复杂度:m+n)==========start
//获取next数组
void getNext(int* next, SString* pattern) {
int j = 0;
next[0] = 0;
for (int i = 1; i < pattern->length; i++)
{
while (j > 0 && pattern->ch[i] != pattern->ch[j])
{
j = next[j - 1];
}
if (pattern->ch[i] == pattern->ch[j])
{
j++;
}
next[i] = j;
}
}
bool KMP(SString* s, SString* pattern) {
int* next = (int*)malloc(sizeof(int) * pattern->length);
getNext(next, pattern); //获取next数组
int j = 0; //模式串的下标
for (int i = 0; i < s->length, j < pattern->length;)
{
if (s->ch[i] == pattern->ch[j])
{
printf("%d==相等==%d==j==>%d===i===>%d\n", s->ch[i], pattern->ch[j],j,i);
j++;
i++;
if (j == pattern->length)
{
printf("匹配成功\n");
return true;
}
continue;
}
else
{
printf("%d==不相等==%d===j=》%d==i==>%d\n", s->ch[i], pattern->ch[j], j,i);
if (j > 0) {
j = next[j - 1];
}
else
{
i++;
}
}
}
return false;
}
//KMP(t主要是利用next数组来进行匹配回溯,这里面关键是主串不需要回溯,时间复杂度:m+n)==========end
void main() {
SString s;
SString pattern;
initStringa(&s);
initStringb(&pattern);
printf("测试BF========start\n");
if (BF(&s, &pattern))
{
printf("匹配成功\n");
}
else
{
printf("匹配失败\n");
}
printf("测试BF========end\n");
printf("测试RK========start\n");
if (RK(&s, &pattern))
{
printf("匹配成功\n");
}
else
{
printf("匹配失败\n");
}
printf("测试RK========end\n");
printf("测试KMP========start\n");
if (KMP(&s, &pattern))
{
printf("匹配成功\n");
}
else
{
printf("匹配失败\n");
}
printf("测试KMP========end\n");
}