串、数组、广义表总结

奈奈子0207

于 2024-05-06 00:16:49 发布

阅读量764

点赞数 20

分类专栏：数据结构与算法文章标签：数据结构算法

本文链接：https://blog.csdn.net/m0_68673360/article/details/138476731

版权

数据结构与算法专栏收录该内容

17 篇文章

订阅专栏

本文介绍了数据结构中的串、数组和广义表，并详细讨论了KMP算法在检测环状病毒DNA在人类DNA中的应用案例，通过编程示例展示了如何使用KMP算法进行模式匹配以检测病毒感染。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

文章目录

前言
一、总结
二、病毒感染检测案例
总结

前言

T_T此专栏用于记录数据结构及算法的（痛苦）学习历程，便于日后复习（这种事情不要啊）。所用教材为《数据结构 C语言版第2版》严蔚敏。

一、总结

本章介绍了三种数据结构：串、数组和广义表，主要内容如下。
(1)串是内容受限的线性表，它限定了表中的元素为字符。串有两种基本存储结构：顺序存储和链式存储，但多采用顺序存储结构。串的常用算法是模式匹配算法，主要有BF算法和KMP算法。BF算法实现简单，但存在回溯，效率低，时间复杂度为O(m x n)。KMP算法对BF算法进行改进，消除回溯，提高了效率，时间复杂度为O(m + n)。
(2)多维数组可以看成是线性表的推广，其特点是结构中的元素本身可以是具有某种结构的数据，但属于同一数据类型。一个n维数组实质上是n个线性表的组合，其每一维都是一个线性表。数组一般采用顺序存储结构，故存储多维数组时，应先将其确定转换为一维结构，有按 “行“ 转换和按 “列“ 转换两种。科学与工程计算中的矩阵通常用二维数组来表示，为了节省存储空间，对于几种常见形式的特殊矩阵，比如对称矩阵、三角矩阵和对角矩阵，在存储时可进行压缩存储，即为多个值相同的元只分配一个存储空间，对零元不分配空间。
(3)广义表是另外一种线性表的推广形式，表中的元素可以是称为原子的单个元素，也可以是一个子表，所以线性表可以看成广义表的特例。广义表的结构相当灵活，在某种前提下，它可以兼容线性表、数组、树和有向图等各种常用的数据结构。广义表的常用操作有取表头和取表尾。广义表通常采用链式存储结构：头尾链表的存储结构和扩展线性链表的存储结构。

二、病毒感染检测案例

1.案例引入

医学研究者最近发现了某些新病毒，通过对这些病毒的分析，得知它们的 DNA 序列都是环状的。现在研究者已收集了大量的病毒 DNA 和人的DNA 数据，想快速检测出这些人是否感染了相应的病毒。为了方便研究，研究者将人的 DNA 和病毒 DNA 均表示成由一些字母组成的字符串序列，然后检测某种病毒 DNA 序列是否在患者的 DNA 序列中出现过，如果出现过，则此人感染了该病毒，否则没有感染。例如，假设病毒的 DNA 序列为 baa, 患者 1 的 DNA 序列为aaabbba,则感染，患者 2 的 DNA 序列为 babbba, 则未感染。（注意，人的 DNA 序列是线性的，而病毒的DNA 序列是环状的）

2.案例分析

对于每一个待检测的任务，假设病毒 DNA 序列的长度是 m，因为病毒 DNA 序列是环状的，可将存储人DNA序列的字符串长度扩大为 2m，成“环”，将病毒字符串作为模式串，将人的DNA序列作为主串，调用KMP算法进行模式匹配。

3.具体实现

代码如下：

#define  _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <string.h>
using namespace std;

#define maxlen 255
#define ok 1
#define fail 0
#define true 1
#define false 0
#define Not_Exist -1
#define overflow -2


typedef int Status;

typedef struct {
	char *ch;
	int length;
}HString;

//初始化，创建空主串S
Status StrAssign(HString& S)
{
	S.ch = new char[maxlen + 1];
	S.length = 0;
	return ok;
}
//初始化，创建空主串S
Status StrValueInit(HString& S,const char* chars)
{
	strcpy(S.ch, chars);
	S.length = strlen(S.ch);
	return ok;
}
//由串S复制得到串T
Status StrCopy(HString S, HString& T)
{
	if (!S.ch)return Not_Exist;
	strcpy(T.ch, S.ch);
	T.length = strlen(T.ch);
	return ok;
}
//判断串S是否为空
Status StrEmpty(HString S)
{
	if (!S.ch)return Not_Exist;
	if (!S.length)return true;
	return false;
}
//比较两个串的值
Status StrCompare(HString S, HString T)
{
	if (!S.ch || !T.ch)return Not_Exist;
	return strcmp(S.ch, T.ch);
}
//返回串的长度
Status StrLength(HString& S)
{
	if (!S.ch)return Not_Exist;
	return S.length;
}
//清空串
Status ClearString(HString& S)
{
	if (!S.ch)return Not_Exist;
	S.length = 0;
	return ok;
}
//将两个串联接成新串
Status Concat(HString& T, HString S1, HString S2)
{
	if (!S1.ch || !S2.ch)return Not_Exist;
	HString ch1; StrAssign(ch1); StrCopy(S1, ch1);
	T.ch = strcat(ch1.ch, S2.ch);
	T.length = strlen(T.ch);

	return	ok;
}
// 用Sub返回串s的第pos个字符起长度为len的子串
Status SubString(HString& Sub,HString S,int pos,int len)
{
	if (!S.ch)return Not_Exist;
	if (pos<1 || pos>S.length)return fail;
	if (len<0 || len>S.length - pos + 1)return fail;
	strncpy(Sub.ch, S.ch + pos - 1, len);
	*(Sub.ch + len) = '\0';
	Sub.length = len;

	return ok;
}
//若主串S中存在和串T值相同的子串，则返回它在主串S中第pos个字符之后第一次出现的位置，否则返回0
Status Index(HString S, HString T, int pos)
{
	if (!S.ch || !T.ch)return Not_Exist;
	if (T.length == 0)return fail;
	if (pos<1 || pos>S.length)return fail;
	return strstr(S.ch + pos - 1, T.ch) - S.ch + 1;
}
//用V替换主串S中出现的所有与T相等的不重叠的子串
Status Replace(HString& S, HString T,HString V)
{
	if (!S.ch || !T.ch || !V.ch)return Not_Exist;
	if (!T.length)return fail;
	char * pos = strstr(S.ch, T.ch);
	char * ch1=new char[maxlen+1]; 
	while (pos)
	{
		strcpy(ch1, pos + T.length);
		strcpy(pos, V.ch);
		strcpy(pos + V.length, ch1);
		pos = strstr(pos + V.length, T.ch);
	}
	S.length = strlen(S.ch);
	delete[] ch1;
	return ok;
}
//从串s中第pos个字符起插入长度的子串T
Status StrInsert(HString& S, int pos, HString T)
{
	if (!S.ch || !T.ch)return Not_Exist;
	if (pos<1 || pos>S.length + 1)return fail;
	char * ch1 = new char[maxlen + 1];
	strcpy(ch1, S.ch+pos-1);			//保存第pos个字符右边的字符
	strcpy(S.ch + pos - 1, T.ch);       //插入T
	strcpy(S.ch + pos - 1 + T.length, ch1);  //在T的尾部加上原本在第pos个字符右边的字符
	S.length = S.length + T.length;
	delete[] ch1;
	return ok;
}
//从串s中删除第pos个字符起长度为len的子串
Status StrDelete(HString& S, int pos, int len)
{
	if (!S.ch)return Not_Exist;
	if (pos<1 || pos>S.length)return fail;
	if (len<1 || len>S.length - pos + 1)return fail;
	char* ch1 = new char[maxlen + 1];
	strcpy(ch1, S.ch + pos + len - 1);
	strcpy(S.ch + pos - 1, ch1);
	S.length = S.length - len;
	delete[] ch1;
	return ok;
}

//获取next数组
Status GetNext(HString S,int next[])
{
	int i = 1, j = 0;
	next[1] = 0;
	while (i < S.length)
	{
		if (j == 0 || *(S.ch + i - 1) == *(S.ch + j - 1))
		{
			++i; ++j; next[i] = j;
		}
		else j = next[j];
	}

	return ok;
}
//获取nextval数组
Status GetNextval(HString S, int nextval[])
{
	int i = 1, j = 0;
	nextval[1] = 0;
	while (i < S.length)
	{
		if (j == 0 || *(S.ch + i - 1) == *(S.ch + j - 1))
		{
			++i; ++j;
			if (*(S.ch + i - 1) != *(S.ch + j - 1))
				nextval[i] = j;
			else
				nextval[i] = nextval[j];
		}
		else j = nextval[j];
	}

	return ok;
}
//KMP算法，利用模式串 T 的 nextval 函数求 T 在主串 S 中第 pos 个字符之后的位置
Status KMP(HString S,HString T,int pos,int nextval[])
{
	if (!S.ch || !T.ch)return Not_Exist;
	if (pos<1 || pos>S.length + 1)return fail;
	int i = pos, j = 1;
	while (i <= S.length && j <= T.length)
	{
		if (j == 0 || *(S.ch + i - 1) == *(T.ch + j - 1))
		{
			++i; ++j;
		}
		else j = nextval[j];
	}
	if (j > T.length)return i - T.length;

	return 0;
}

HString s,t,q,sub;
int nextval[maxlen];
int main()
{
	StrAssign(s);
	StrValueInit(s, "abcdef");  //人DNA
	StrInsert(s, s.length + 1, s);  //复制一份接在后面，以满足“环”
	StrAssign(t);
	StrValueInit(t, "defa");   //病毒DNA
	GetNextval(t, nextval);
	cout << KMP(s, t, 1, nextval) << endl;  //获取查找结果
	return 0;
}