1月9号学习总结

陌言不会python

已于 2023-01-10 10:59:29 修改

阅读量123

点赞数

文章标签：学习

于 2023-01-09 22:21:30 首次发布

本文链接：https://blog.csdn.net/weixin_72090484/article/details/128615901

版权

自习

KMP算法

作用：

在一个已知字符串中查找子串的位置,也叫做串的模式匹配

与之相同作用的是朴素的模式匹配算法，但时间复杂度要远远高于KMP算法

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
void next_cre(char *str, int *next)//str为要查找的串,next为要存储的地方
//next[i]的值表示下标为i的字符前的字符串最长相等前后缀的长度，同时也表示该处字符不匹配时应该回溯到的字符的下标
{
	int stlen = strlen(str);
	next[0] = -1;//next[0]初始化为-1，-1表示不存在相同的最大前缀和最大后缀
	int k = -1;//k初始化为-1
	for (int q = 1; q <= stlen - 1; q++) {
		while (k > -1 && str[k + 1] != str[q]) { //如果下一个不同，那么k就变成next[k]，注意next[k]是小于k的，无论k取任何值。
			k = next[k];//往前回溯
		}
		if (str[k + 1] == str[q]) { //如果相同，k++(一开始k+1指向str字符串中第一个字符，q指向第二个，q先向后移动，直到找到一对相同的字符串，然后k+1向后移动一个单位)
			k++;
		}
		next[q] = k;//这个是把算的k的值（就是相同的最大前缀和最大后缀长）赋给next[q]
	}
}
int KMP(char *str,  char *ptr) { //str为主串，ptr为模式串
	int slen = strlen(str), plen = strlen(ptr);
	int *next = (int*)malloc(sizeof(plen));//开辟一个和模式串同等大小的整型数组
	next_cre(ptr, next);//计算next数组
	int k = -1;
	for (int i = 0; i < slen; i++) {
		while (k > -1 && ptr[k + 1] != str[i]) //ptr和str不完全匹配（k>-1表示ptr前端和str有部分匹配了的）
			k = next[k];//往前回溯，一直回溯到k指向ptr数组中相等前后缀的前缀的最后一个字符的后一个字符
		if (ptr[k + 1] == str[i]) {//一开始i和k+1分别指向str和ptr第一个字符，然后i先向后移动，直到找到str字符串中一个与ptr相等的字符，k向后移动一个单位长度
			k++;
		}
		if (k == plen - 1) { //说明k移动到ptr的最末端，即从str中找到与ptr相等的子字符串
			/*
			printf("找出一个子字符串，第一个字符下标：%d",i - plen + 1);
			k = -1;//若将k重新初始化，可再寻找下一个相等的子字符串
			i = i - plen + 1;//i定位到该位置，外层for循环i++可以继续找下一个
			*/
			return i - plen + 1; //返回相应的位置
		}
	}
	return -1;//表示主串中不包含模式串
}
int main() {
	char io[27] = {"HMABCDABGHABCH"};
	char ui[13] = {"ABCDABH"};
	printf("%d", KMP(io, ui));
}

其中的next数组作用有两个：

一是之前提到的，

next[i]的值表示下标为i的字符前的字符串最长相等前后缀的长度。

二是：

表示该处字符不匹配时应该回溯到的字符的下标

该算法的核心逻辑：回溯

例题：KMP字符串匹配

【模板】KMP字符串匹配 - 洛谷

题目描述

给出两个字符串 s1s1 和 s2s2，若 s1s1 的区间 [l,r][l,r] 子串与 s2s2 完全相同，则称 s2s2 在 s1s1 中出现了，其出现位置为 ll。
现在请你求出 s2s2 在 s1s1 中所有出现的位置。

定义一个字符串 ss 的 border 为 ss 的一个非 ss 本身的子串 tt，满足 tt 既是 ss 的前缀，又是 ss 的后缀。
对于 s2s2，你还需要求出对于其每个前缀 s′s′ 的最长 border t′t′ 的长度。

输入格式

第一行为一个字符串，即为 s1s1。
第二行为一个字符串，即为 s2s2。

输出格式

首先输出若干行，每行一个整数，按从小到大的顺序输出 s2s2 在 s1s1 中出现的位置。
最后一行输出 ∣s2∣∣s2∣ 个整数，第 ii 个整数表示 s2s2 的长度为 ii 的前缀的最长 border 长度。

输入输出样例

输入 #1
ABABABC
ABA
输出 #1
1
3
0 0 1 
说明/提示

样例 1 解释

。

对于 s2s2 长度为 33 的前缀 ABA，字符串 A 既是其后缀也是其前缀，且是最长的，因此最长 border 长度为 11。

数据规模与约定

本题采用多测试点捆绑测试，共有 3 个子任务。

Subtask 1（30 points）：∣s1∣≤15∣s1∣≤15，∣s2∣≤5∣s2∣≤5。
Subtask 2（40 points）：∣s1∣≤104∣s1∣≤104，∣s2∣≤102∣s2∣≤102。
Subtask 3（30 points）：无特殊约定。

对于全部的测试点，保证 1≤∣s1∣,∣s2∣≤1061≤∣s1∣,∣s2∣≤106，s1,s2s1,s2 中均只含大写英文字母。

算法：KMP

思路：略

代码：

#include<stdio.h>
#include<string.h>
#define JU 1100000
char s[JU]={"\0"},t[JU]={"\0"};
void cal_next(char *str, int *next, int stlen)//str为要查找的串,next为要存储的地方，stlen为字符串str的长度
//next[i]的值表示下标为i的字符前的字符串最长相等前后缀的长度，同时也表示该处字符不匹配时应该回溯到的字符的下标
{
    next[0] = -1;//next[0]初始化为-1，-1表示不存在相同的最大前缀和最大后缀
    int k = -1;//k初始化为-1
    for (int q = 1; q <= stlen-1; q++)
    {
        while (k > -1 && str[k + 1] != str[q])//如果下一个不同，那么k就变成next[k]，注意next[k]是小于k的，无论k取任何值。
        {
            k = next[k];//往前回溯
        }
        if (str[k + 1] == str[q])//如果相同，k++(一开始k+1指向str字符串第一个，q指向第二个，q先向后移动，直到找到一对相同的字符串，然后k+1向后移动一个单位)
        {
            k = k + 1;
        }
        next[q] = k;//这个是把算的k的值（就是相同的最大前缀和最大后缀长）赋给next[q]
    }
}
void KMP(char *str, int slen, char *ptr, int plen)//str为主串，ptr为模式串
{
    int *next = new int[plen];
    cal_next(ptr, next, plen);//计算next数组
    int k = -1;
    for (int i = 0; i < slen; i++)
    {
        while (k >-1&& ptr[k + 1] != str[i])//ptr和str不匹配，且k>-1（表示ptr和str有部分匹配）
            k = next[k];//往前回溯
        if (ptr[k + 1] == str[i])
            k = k + 1;
        if (k == plen-1)//说明k移动到ptr的最末端
        {
            printf("%d\n",i-plen+2);
			k=-1;//k重新初始化
			i=i-plen+1;//i定位至此   
        }
    }
    for(int i=0;i<plen;i++){
		printf("%d ",next[i]+1);
	} 
}
int main(){
	scanf("%s",&s);//不能用gets，因为换行符的存在
	scanf("%s",&t);
	KMP(s,strlen(s),t,strlen(t));
}

例题：Barn Echoes G

[USACO09OCT]Barn Echoes G - 洛谷

题目描述

The cows enjoy mooing at the barn because their moos echo back, although sometimes not completely. Bessie, ever the excellent

secretary, has been recording the exact wording of the moo as it goes out and returns. She is curious as to just how much overlap there is.

Given two lines of input (letters from the set a..z, total length in the range 1..80), each of which has the wording of a moo on it, determine the greatest number of characters of overlap between one string and the other. A string is an overlap between two other strings if it is a prefix of one string and a suffix of the other string.

By way of example, consider two moos:

moyooyoxyzooo

yzoooqyasdfljkamo

The last part of the first string overlaps 'yzooo' with the first part of the second string. The last part of the second string

overlaps 'mo' with the first part of the first string. The largest overlap is 'yzooo' whose length is 5.

POINTS: 50

奶牛们非常享受在牛栏中哞叫，因为她们可以听到她们哞声的回音。虽然有时候并不能完全听到完整的回音。Bessie曾经是一个出色的秘书，所以她精确地纪录了所有的哞叫声及其回声。她很好奇到底两个声音的重复部份有多长。

输入两个字符串（长度为1到80个字母），表示两个哞叫声。你要确定最长的重复部份的长度。两个字符串的重复部份指的是同时是一个字符串的前缀和另一个字符串的后缀的字符串。

我们通过一个例子来理解题目。考虑下面的两个哞声：

moyooyoxyzooo

yzoooqyasdfljkamo

第一个串的最后的部份"yzooo"跟第二个串的第一部份重复。第二个串的最后的部份"mo"跟第一个串的第一部份重复。所以"yzooo"跟"mo"都是这2个串的重复部份。其中，"yzooo"比较长，所以最长的重复部份的长度就是5。

输入格式

* Lines 1..2: Each line has the text of a moo or its echo

输出格式

* Line 1: A single line with a single integer that is the length of the longest overlap between the front of one string and end of the other.

输入输出样例

输入 #1

abcxxxxabcxabcd
abcdxabcxxxxabcx

输出 #1

11

说明/提示

'abcxxxxabcx' is a prefix of the first string and a suffix of the second string.

算法：KMP，暴力

思路：

找最长重复子串，还不会hash，数据较小，可以直接暴力求解，

找出两个字符串中较小的一个，从较小字符串中拿出子串到较长的字符串中匹配，只要找出匹配了的子串的最长长度即可

代码：

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
void cal_next(char *str, int *next)//str为要查找的串,next为要存储的地方
//next[i]的值表示下标为i的字符前的字符串最长相等前后缀的长度，同时也表示该处字符不匹配时应该回溯到的字符的下标
{
	int stlen = strlen(str);
	next[0] = -1;//next[0]初始化为-1，-1表示不存在相同的最大前缀和最大后缀
	int k = -1;//k初始化为-1
	for (int q = 1; q <= stlen - 1; q++) {
		while (k > -1 && str[k + 1] != str[q]) { //如果下一个不同，那么k就变成next[k]，注意next[k]是小于k的，无论k取任何值。
			k = next[k];//往前回溯
		}
		if (str[k + 1] == str[q]) { //如果相同，k++(一开始k+1指向str字符串中第一个字符，q指向第二个，q先向后移动，直到找到一对相同的字符串，然后k+1向后移动一个单位)
			k = k + 1;
		}
		next[q] = k;//这个是把算的k的值（就是相同的最大前缀和最大后缀长）赋给next[q]
	}
}
int KMP(char *str,  char *ptr, int plen) { //str为主串，ptr为模式串
	int slen = strlen(str);
	int *next = new int[plen];
	cal_next(ptr, next);//计算next数组
	int k = -1;
	for (int i = 0; i < slen; i++) {
		while (k > -1 && ptr[k + 1] != str[i]) //ptr和str不匹配，且k>-1（表示ptr和str有部分匹配）
			k = next[k];//往前回溯
		if (ptr[k + 1] == str[i])
			k = k + 1;
		if (k == plen - 1) { //说明k移动到ptr的最末端重叠）
			return i - plen + 1; //返回相应的位置
		}
	}
	return -1;//表示主串中不包含模式串
}
char a[100] = {"\0"}, b[100] = {"\0"};
int main() {
	scanf("%s%s", &a, &b);
	int alen = strlen(a), blen = strlen(b);
	if (alen > blen) { //判断长度
		int max = 0;
		for (int i = 0; i < blen - 1; i++) { //从前面开始
			for (int k = i + 1; k < blen; k++) {
				char tem[120] = {"\0"};
				int lentem = 0;//模式串长度
				for (int m = i; m <= k; m++) { //复制b串中i到k下标的字符到字符串tem
					tem[lentem++] = b[m];
				}
				if (KMP(a, tem, lentem) != -1 && max < lentem) {//子串在主串中找得到且长度大于max
					max = lentem;
				}
			}
		}
		printf("%d", max);
	} else {
		int max = 0;
		for (int i = 0; i < alen - 1; i++) { //从前面开始
			for (int k = i + 1; k < alen; k++) {
				char tem[100] = {"\0"};
				int lentem = 0;
				for (int m = i; m <= k; m++) { //复制a串中i到k下标的字符到字符串tem
					tem[lentem++] = a[m];
				}
				if (KMP(b, tem, lentem) != -1 && max < lentem) {
					max = lentem;
				}
			}
		}
		printf("%d", max);
	}
	return 0;
}