第四章数据结构与算法- 串

最新推荐文章于 2023-01-29 16:33:57 发布

feiyangxiaomi

最新推荐文章于 2023-01-29 16:33:57 发布

阅读量1k

点赞数 1

分类专栏：密码学文章标签： BF KMP 求最大字串兄弟字符

本文链接：https://blog.csdn.net/feiyangxiaomi/article/details/12953609

版权

密码学专栏收录该内容

4 篇文章 0 订阅

订阅专栏

字符串数据是计算机非数值处理的主要对象之一。在早期字符串作为输入输出的变量出现，现已发展成为字符串类型，可以进行一系列的操作。

字符串一般简称为串。在汇编和编译程序中，源程序是字符串数据。在事务处理程序中，顾客的姓名和地址以及货物的名称、产地等也是字符串。此外，如信息检索系统、文字编辑程序、自然语言翻译系统等都是以字符串为处理对象。

然后，目前的计算机硬件结构主要是面向数值计算的需要，基本上没有提供处理字符串数据操作的指令，需要用软件来实现字符串类型。由于各种应用具有不同的特点，要有效的实现字符处理，必须根据具体情况使用合适的存储结构。

1、********************************************************************************************

****这里有两种基本算法：BF算法（Brute-Force，最基本的字符串匹配算法）、

****KMP算法。

BF算法：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* run this program using the console pauser or add your own getch, system("pause") or input loop */
/***********************************************************************
****这里A是主串，B是模式串.判断B是否为A的字串。 
****
*/
int count=0;
int main(int argc, char *argv[]) {
	char A[]="ababcdeec", B[]="abcdee";	
	if(1 == StrBF(A, B, count)){
		printf("successed !\n");
		
	}else{
		printf("failed !");
	}
	printf("执行了%d次\n", count);
	return 0;
}

int StrBF(char A[], char B[]){
	int aLen = strlen(A);
	int bLen = strlen(B);
	int i,j=0;
	for(i=0; i<aLen; i++){
		if(A[i] == B[j]){
			j++;
		}else{
			i = i-j+1;
			j=0;			
		}
		count++;
		if(j == bLen)
			return 1;
	}
	return -1;
}

KMP算法：

在KMP算法中，如果有部分字符已经匹配，即当前S[i] = T[j]，例S=cababc T=ababd 可以说i=1，j=0，此时S[1]=T[0] ，那么现在i++，j++，继续匹配，直到S[i] !=T[j]的时候，那么此时i=5，j=4。这时候i不动，保持不变，并且让j=next[j]，这里可能大家就迷惑了，next[j]哪里来的，为什么要让j=next[j] ？这就是KMP算法的难点，也是算法的核心。

这里我具体描述一下，因为T中的abab是重复的，那么我们让 j = next[j] = 2 让 S[5]和 T[2]进行比较，毕竟我们知道S[3、4]=ab = T[2、3]=ab，同时T中的abab是重复的，所以只需让 S[5]和 T[2]进行比较，就不用回溯到初试位置。这就是KMP的理念，寻找next[j]。

KMP算法分为两部分，首先计算next()函数，然后进行串匹配。

例如：T = ababd

j	1	2	3	4	5
T[j]	a	b	a	b	d
next[j]	0	1	1	2	1

现在KMP算法已经讲述的很清楚了，下面我们程序实现。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* run this program using the console pauser or add your own getch, system("pause") or input loop */
/***********************************************************************
****这里A是主串，B是模式串.判断B是否为A的字串。 
****char A[]="ababcdeec", B[]="abab";
****只有形参为指针的时候才能改变实参的值 
*/
int *StrKmpNext(char B[]);

int main(int argc, char *argv[]) {
	char A[]="ababcdeec", B[]="abab";	
	int *p;
	int len;
	len = strlen(B); 
	p = StrKmpNext(B);
	p++;
	while(-1 != *p){		
		printf("%d\t", *p++);
	}
	printf("执行了%d次\n", len);
	return 0;
}
//求取next数组 
int *StrKmpNext(char B[]){
	int bLen = strlen(B);
	int i=0,j=0;
	int *next = malloc(sizeof(int)*bLen+8);//这里next[1] - next[bLen] - next[bLen+1]
	for(i=0; i<=bLen; i++){
		next[i] = 0;
	}
	next[1] = 0;
	i = 1;
	while(i<bLen){
		if(B[i] == B[j] || j==0){		
			i++;
			j++;	
			next[i] = j;					
		}else{
			j = next[j];
		}
	} 
	next[i+1] = -1;
	return next;
}

根据已经拿到的next值，使用KMP算法判定B是否为A的字串。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* run this program using the console pauser or add your own getch, system("pause") or input loop */
/***********************************************************************
****这里A是主串，B是模式串.判断B是否为A的字串。 
****char A[]="ababcdeec", B[]="abab";
****只有形参为指针的时候才能改变实参的值 
*/
int *StrKmpNext(char B[]);

int main(int argc, char *argv[]) {
	char A[]="adababeec", B[]="abab";	
	int *p;
	int len;
	int i,j=0;
	len = strlen(B); 
	p = StrKmpNext(B);
	p++;
	//****kmp算法的实现过程 
	for(i=0; i<strlen(A); i++){
		if(A[i] == B[j]){
			j++;
			if(j == len){
				printf("B是A的字串\n");
				return;
			}			
		}else{
			j = p[j+1];
		}		
	}
	printf("B不是A的字串\n");
	return 0;
}
//求取next数组 
int *StrKmpNext(char B[]){
	int bLen = strlen(B);
	int i=0,j=0;
	int *next = malloc(sizeof(int)*bLen+8);//这里next[1] - next[bLen] - next[bLen+1]
	for(i=0; i<=bLen; i++){
		next[i] = 0;
	}
	next[1] = 0;
	i = 1;
	while(i<bLen){
		if(B[i] == B[j] || j==0){		
			i++;
			j++;	
			next[i] = j;					
		}else{
			j = next[j];
		}
	} 
	next[i+1] = -1;
	return next;
}

KMP算法在判断是否为某一串的字串时，若匹配串重复度特别高，则效率会特别明显。

2、/*********************************************
****给定字符串A和B,输出A和B中的最大公共子串。
****比如A="aocdfe" B="pmcdfa" 则输出"cdf"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*********************************************
****给定字符串A和B,输出A和B中的最大公共子串。
****比如A="aocdfe" B="pmcdfa" 则输出"cdf" 
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
char *commanstring(char shortstring[],char longstring[]);
int main(int argc, char *argv[]) {
	char *str1 = "aocdfe";
	char *str2 = "aocfe";
	char *comman = NULL;
	if(strlen(str1)>strlen(str2))
		comman= commanstring(str2,str1);
	else
		comman = commanstring(str1,str2);
	printf("the longest comman string is:%s\n",comman);	
	return 0;
}
char *commanstring(char shortstring[],char longstring[])
{
	int i,j;
	char *substring = malloc(256);
	int lenShort = strlen(shortstring);
	if(strstr(longstring,shortstring)!=NULL)
		return shortstring;
	for(i=lenShort-1;i>0;i--){
		for(j=0;j<=lenShort-i;j++){
			memcpy(substring,&shortstring[j],i);
			substring[i]='\0';
			if(strstr(longstring,substring)!=NULL)
				return substring;
		}
	}
	return NULL;
}

3、/*********************************************
***如果两个字符串的字符一样，但是顺序不一样，被认为是兄弟字符串，

***问如何在迅速匹配兄弟字符串（如，bad和adb就是兄弟字符串）。

#include <stdio.h>
#include <stdlib.h>

/* run this program using the console pauser or add your own getch, system("pause") or input loop 
*如果两个字符串的字符一样，但是顺序不一样，被认为是兄弟字符串，问如何在迅
速匹配兄弟字符串（如，bad和adb就是兄弟字符串）。思路：判断各自素数乘积
是否相等。更多方法请参见：http://blog.csdn.net/v_JULY_v/article/details/6347454。
*date:2013/11/2 
*@author_hanzhen
*/
int main(int argc, char *argv[]) {
	char A[]={"bad"},B[]={"adb"};
	if(array(A,B) == 1)
		printf("A and B are the brothers!");
	else
		printf("NO,error!");
	return 0;
}

int array(char A[],char B[]){
	int i=0;
	char *a, *b;
	int Aresult[26]={0},Bresult[26]={0};
	a = A;
	b = B;
	if(strlen(A)!=strlen(B))  return -1;
	i=0;
	while(i<strlen(A))
	{ 
		i++;
		Aresult[*a-'a']++;
		a++;
		Bresult[*b-'a']++;
		b++;
		//printf("%d\n", i);
	}
	for(i=0;i<26;i++){
		if(Aresult[i]!=Bresult[i]){			
			return -1;
		}		
	}
	if(i>=26)
			return 1;	
}