利用KMP算法求解输入一个子串输出其在英文段落中哪个单词的第几个位置出现(C语言)

最新推荐文章于 2023-04-06 21:42:42 发布

水獭先生

最新推荐文章于 2023-04-06 21:42:42 发布

阅读量310

点赞数

分类专栏：算法题文章标签：算法字符串 c语言

本文链接：https://blog.csdn.net/qq_39414417/article/details/104940235

版权

算法题专栏收录该内容

5 篇文章 0 订阅

订阅专栏

如下给出一个read.txt文件，里面是一个简单的英文段落：
    hello!everyone,I am a good boy in Tsinghua University,I am good at english and I
like Peking!
    I am hard-working，especially do the things I am interested in.
当我们查找子串 ng 时，我们需要输出：
在这里插入图片描述
    下面我给出代码，先给出main函数和头文件：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD 20 //每个单词的最长长度
#define MAX_LINE 1024 //暂存从文件读取的段落大小

int main() {
	int count = 0;//计算单词
	char substr[MAX_WORD];//输入的子串
	char buf[MAX_LINE];//暂存段落
	char *words[MAX_LINE];//记录每个单词字符串的首地址
	//以下为C的读取文件操作
	FILE *fp;
	if((fp = fopen("read.txt", "r+")) == NULL) {
		printf("Can not open this file");
		exit(0);
	}
	fscanf(fp, "%[^\r]s", buf);//忽略文本中的\r，使得读取整个段落的信息
	getWords(buf, words, count);//将段落中的每个单词存入words中,我上篇博客有细讲
	scanf("%s", substr);
	int len = strlen(substr);
	int next[len]; //定义next数组
	getNext(substr, next, len);//给next数组设定值
	getMatch(substr, words, count, next);
	return 0;
}

在getWords函数执行后，words中装的都是每个单词，所以我们再对每个单词进行分别匹配：（因为我们要用到KMP算法，所以把next数组也传入）

void getMatch(char *substr, char **words, int count, int *next) {
	int i;
	for(i = 0; i < count; ++i) {
		strMatch(substr, words[i], i+1, next);
	}
}

再调用getMatch之前先初始化next数组：

void getNext(char *substr, int *next, int len) {
	int j = 0, t = -1;
	next[j] = -1;
	while(j < len - 1) {
		if(t == -1 ||substr[j] == substr[t]) {
			if(substr[j+1] != substr[t+1]) {
				next[j+1] = t+1;
			} else {
				next[j+1] = next[t+1];
			}
			++t;
			++j;
		} else {
			t = next[t];
		}
	}
}

回到getMatch函数，在getMatch函数中对每个单词进行匹配操作，每个单词与子串进入strMatch进行匹配:
void strMatch(char *substr, char *word, int word_index, int *next)，其中前两个是子串和匹配的单词后面i+1是单词的索引位置，最后是next数组。

void strMatch(char *substr, char *word, int word_index, int *next) {
	int s_index;
	int sub_len = strlen(substr);
	int word_len = strlen(word);
	if(sub_len == word_len) {
		if(strcmp(substr, word) == 0) {
			s_index = 1;
			printf("%s 在第 %d 单词的第 %d 字符\n",, substr, word_index, s_index);
		}
	}
	if(sub_len < word_len) {
	//KMP算法比较过程
		int i = 0, j = 0;
		while(i < word_len) {
			if(j == -1 || substr[j] == word[i]) {
				i++;
				j++;
			} else {
				j = next[j];
			}
			if(j == sub_len) {
				s_index = i - j + 1;
				printf("%s 在第 %d 单词的第 %d 字符\n",, substr, word_index, s_index);
				}
		}
	}
}