KMP算法代码实现

KMP算法,看了好久,才写出代码,理解起来有点难度,后来在捋顺了关系后,再回顾就没有之前那样困难了

KMP算法的关键在于求next,而next的计算则在于前缀字符串的计算,将问题分解,先求出f(最大前缀字符串),之后再求next

KMP是一个单模匹配算法,时间复杂度为O(m),其中m是带搜索目标串的长度。


写出KMP算法代码,主要参考了2篇文章,在本处附上:

https://wenku.baidu.com/view/8f0bba4bb9d528ea80c7790a.html?from_page=view&from_mod=download

http://blog.csdn.net/joylnwang/article/details/6778316/

原理可参考上面的两篇文章,这里附上我的实现代码,希望对理解该算法有帮助。

#include<iostream>  
#include<vector>  
std::vector<int> getNext(std::string pattern)  
{  
    std::vector<int> f(pattern.size()+1, 0);  //下标从1开始,满足[1, k-1] = [j - (k - 1), j - 1],最大k  
    std::vector<int> next(pattern.size()+1, 0); //next数组,下标从1开始计算  
    f[1] = 0;  
    int t = 1;  
    int i = 2;  
    while(i < (pattern.size() + 1))  
    {  
        while ((t > 1) && (pattern[t - 2] != pattern[i - 2]))  
        {  
            t = f[t];  
        }  
          
        if (1 != t)  
        {  
            f[i] = t;  
        }  
        else  
        {  
            /*看前一个字符是否与pattern第一个字符匹配,若匹配,则k为2*/
            if (((i - 2 ) != 0) && (pattern[0] == pattern[i - 2]))
            {
                f[i] = 2;
            }
            else
            {
                f[i] = 1;
            } 
        }  
        i++;  
        t++;  
    }  
      
    /* 根据最大前缀数组,求解next,若pattern[f[t] - 1] != pattern[t - 1],则next[t] = f[t] ,否则往前追溯f[t], 判断pattern[f[f[t]] - 1]与pattern[t - 1]是否相等,不相等则next[t]=f[f[t]],否则继续往前追溯,直到t=0 */  
    for (int i = 2; i <= pattern.size(); i++)  
    {  
          
        if (pattern[i - 1] != pattern[f[i] - 1])  
        {  
            next[i] = f[i];  
        }         
        else  
        {  
            t = f[f[i]];  
            while((t > 0) && (pattern[i - 1] == pattern[t - 1]))  
            {  
                t = f[t];  
            }  
            next[i] = t;  
        }  
    }  
      
   return next;  
}  
  
void kmp(std::string target, std::string pattern, std::vector<int>& matches)  
{  
    std::vector<int> next = getNext(pattern);  
    int i = 0, j = 1;  
    while (i < target.size())  
    {  
        if (target[i] == pattern[j - 1])  
        {  
            if (j == pattern.size())  
            {  
                matches.push_back(i - pattern.size() + 1);  
                i++;  
                j = 1;  
            }  
            else  
            {  
                i++;  
                j++;  
            }  
              
        }  
        else  
        {  
            j = next[j];  
            if (0 == j)  
            {  
                j++;  
                i++;  
            }  
        }  
    }  
      
    return;  
}  
  
int main(int argc, char *argv[])  
{  
    std::string pattern = "accabcacac";  
    std::string target  = "abcacbdefaccabcacacacadfabcacababcacbdefacaccabcacaccabcacacadf";  
    std::vector<int> matches;  
    kmp(target, pattern, matches);  
    if (0 == matches.size())  
    {  
        std::cout<<"not match"<<std::endl;  
    }  
    else  
    {  
        for (int i = 0; i < matches.size(); i++)  
        {  
            std::cout<<matches[i]<<":"<<target.substr(matches[i], pattern.size())<<std::endl;  
        }  
          
    }  
      
      
    return 0;  
}  


pattern后缀与前缀有重复,比如abcdefab,那么在匹配完成之后,下一个pattern串匹配位置,可以从c的位置开始,对比下面这份代码与上面代码的结果有什么不同:

#include<iostream>
#include<vector>
void getNext(std::string pattern, std::vector<int>& f, std::vector<int>& next)
{
	int t = 1;
	int i = 2;
	while(i < (pattern.size() + 1))
	{
		while ((t > 1) && (pattern[t - 2] != pattern[i - 2]))
		{
			t = f[t];
		}
		
		if (1 != t)
		{
			f[i] = t;
		}
		else
		{
			/*看前一个字符是否与pattern第一个字符匹配,若匹配,则k为2*/
            if (((i - 2 ) != 0) && (pattern[0] == pattern[i - 2]))
			{
				f[i] = 2;
			}
			else
			{
				f[i] = 1;
			} 
		}
		i++;
		t++;
	}
	
	
	for (int i = 2; i <= pattern.size(); i++)
	{
		
		if (pattern[i - 1] != pattern[f[i] - 1])
		{
			next[i] = f[i];
		}		
		else
		{
			t = f[f[i]];
			while((t > 0) && (pattern[i - 1] == pattern[t - 1]))
			{
				t = f[t];
			}
			next[i] = t;
		}
	}
	
   return;
}

void kmp(std::string target, std::string pattern, std::vector<int>& matches)
{
	std::vector<int> f(pattern.size()+1, 0);	
	std::vector<int> next(pattern.size()+1, 0);
	getNext(pattern, f, next);
	int i = 0, j = 1;
	while (i < target.size())
	{
		if (target[i] == pattern[j - 1])
		{
			if (j == pattern.size())
			{
				matches.push_back(i - pattern.size() + 1);
				/* 关键在这里,若是pattern本身后缀与前缀有重合,则从pattern已匹配过的下个位置开始 */
				if (pattern[f[j] - 1] == pattern[j - 1])
				{
					j = f[j] + 1;
				}
				else
				{
					j = 1;
				}
				i++;
				
			}
			else
			{
				i++;
				j++;
			}
			
		}
		else
		{
			j = next[j];
			if (0 == j)
			{
				j++;
				i++;
			}
		}
	}
	
	return;
}

int main(int argc, char *argv[])
{
	std::string pattern = "accabcacac";
	std::string target  = "abcacbdefaccabcacacacadfabcacababcacbdefacaccabcacaccabcacacadf";
	std::vector<int> matches;
	kmp(target, pattern, matches);
	if (0 == matches.size())
	{
		std::cout<<"not match"<<std::endl;
	}
	else
	{
		/* 查看结果有什么不同 */
		for (int i = 0; i < matches.size(); i++)
		{
			std::cout<<matches[i]<<":"<<target.substr(matches[i], pattern.size())<<std::endl;
		}
		
	}
	
	
	return 0;
}



编译:g++ kmp.cpp -g -std=c++11 -o kmp

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值