随机生成的源字符串序列
随机生成的目的字符串
两种算法的运行结果
算法分析
在本实验中我使用了输出重定向,将A—G随机字符串写入两个文件(data,data2).用两种算法对两个串进行匹配,找到目的串在源串中首次出现的位置,若不成功则返回未找到。第一种算法是普通迭代,双重 for 循环,时间复杂度最坏情况下为 O((n-m)m)O((n−m)∗m),而KMP算法从目标字符串str(假设长度为n)的第一个下标选取和ptr长度(长度为m)一样的子字符串进行比较,如果一样,就返回开始处的下标值,不一样,选取str下一个下标,同样选取长度为n的字符串进行比较,直到str的末尾(实际比较时,下标移动到n-m)。这样的时间复杂度是O(n*m)。
在本实验中源串的规模是10000*10000,目的串为常数,从以上截图可以看出时间复杂度的级别差。
实验代码
#include <iostream>
#include <fstream>
#include <string>
#include <time.h>
using namespace std;
//普通迭代法
class Solution {
public:
int strStr(string haystack, string needle) {
if (haystack.empty() && needle.empty()) return 0;
if (haystack.empty()) return -1;
if (needle.empty()) return 0;
// in case of overflow for negative
if (haystack.size() < needle.size()) return -1;
for (int i = 0; i < haystack.size() - needle.size() + 1; i++) {
string::size_type j = 0;
for (; j < needle.size(); j++) {
if (haystack[i + j] != needle[j]) break;
}
if (j == needle.size()) return i;
}
return -1;
}
};
//kmp
int kmp_find(const string& target,const string& pattern)
{
const int target_length = target.size();
const int pattern_length = pattern.size();
int * overlay_value = new int[pattern_length];
overlay_value[0] = -1;
int index = 0;
for(int i=1;i<pattern_length;++i)
{
index = overlay_value[i-1];
while(index>=0 && pattern[index+1]!=pattern[i])
{
index = overlay_value[index];
}
if(pattern[index+1]==pattern[i])
{
overlay_value[i] = index +1;
}
else
{
overlay_value[i] = -1;
}
}
//match algorithm start
int pattern_index = 0;
int target_index = 0;
while(pattern_index<pattern_length&&target_index<target_length)
{
if(target[target_index]==pattern[pattern_index])
{
++target_index;
++pattern_index;
}
else if(pattern_index==0)
{
++target_index;
}
else
{
pattern_index = overlay_value[pattern_index-1]+1;
}
}
if(pattern_index==pattern_length)
{
return target_index-pattern_index;
}
else
{
return -1;
}
delete [] overlay_value;
}
void function1(){
Solution so;
int ret = 0;
int start, end, time;
ifstream in;
in.open("data.txt",ios::in);
string src, tar;
in>>src;
in.close();
ifstream tin;
tin.open("data2.txt",ios::in);
tin>>tar;
tin.close();
start = clock();
if((ret = so.strStr(src, tar))== -1){
cout<<"未找到"<<endl;
}
else{
cout<<"从第"<<ret<<"个字符匹配成功"<<endl;
}
end = clock();
time = end - start;
cout<<"匹配时间(毫秒):"<<time<<endl;
}
void function2(){
int ret = 0;
int start, end, time;
ifstream in;
in.open("data.txt",ios::in);
string src, tar;
in>>src;
in.close();
ifstream tin;
tin.open("data2.txt",ios::in);
tin>>tar;
tin.close();
start = clock();
if((ret = kmp_find(src, tar))== -1){
cout<<"未找到"<<endl;
}
else{
cout<<"从第"<<ret<<"个字符匹配成功"<<endl;
}
end = clock();
time = end - start;
cout<<"匹配时间(毫秒):"<<time<<endl;
}
int main(){
cout<<"普通迭代法匹配字符串"<<endl;
function1();
cout<<"KMP法匹配字符串"<<endl;
function2();
}
write.c
#include <stdio.h>
#include <stdlib.h>
#define MAX 10000*10000
void write(){
int i = 0;
int tmp = 0;
freopen("data.txt","w",stdout);
srand((unsigned int)time(NULL));
for(i=0; i<MAX; i++){
tmp = rand()%7 + 'A';
putchar(tmp);
}
//putchar('\0');
fclose(stdout);
}
int main(){
write();
}
write2.c
#include <stdio.h>
#include <stdlib.h>
#define MAX 10
void write(){
int i = 0;
int tmp = 0;
srand((unsigned int)time(NULL));
freopen("data2.txt","w",stdout);
for(i=0; i<MAX; i++){
tmp = rand()%7 + 'A';
putchar(tmp);
}
//putchar('\0');
fclose(stdout);
}
int main(){
write();
}