数据结构实验-查找算法实验比较

实验目的

基于教材内容,从顺序查找、二分查找、基于BST的查找和哈希中任选两种查找算法,实现并比较性能。

基本要求

(1)对实现的查找算法进行实验比较,在不同数据规模(N)下执行100次成功查找,以表格形式记录最小、最大和平均查找时间;在不同数据规模(N)下执行100次不成功查找,以表格形式记录最小、最大和平均查找时间。

(2)查找算法要基于教材,测试输入的整数数据文件(5个,文件中数据规模N分别是100,1K,10K,100K和1M),每次查找的比较次数和时间也要输出到文件中。

(3)提交最终实验作业。用附件的形式,提交两个文件:一个压缩包(包含源码和5个用于查找测试的数据文件);一个pdf文档(文档中包含实验日志和一个根据基本要求(1)记录实验结果的表格,然后进行适当的实验结果分析)。

本次实验我选择了哈希查找与二分查找。

首先需要了解二分查找的具体含义:二分查找必须用顺序存储结构进行数据存储,即查找前待测表必须为有序,这是他的缺点,但是优点是每次查找进行关键字中间位置的比较,依次二分下去,直到查找成功或查找不成功。二分查找需要使用顺序结构,这里直接选择数组。

哈希是通过哈希函数对于关键字的映射得到哈希地址建立哈希表,所以每次查找只需要知道哈希函数即可直接取得所查记录,但是同一哈希函数必定会得到冲突的哈希地址,所以现在选取什么哈希函数来建立哈希表成为一个问题。

1.编写建立测试数据函数

通过C++11random库进行随机数生成,详见上一篇随机数生成。

2.编写查找函数代码

二分查找需要有序顺序表,所以先对数据进行排序,再进行二分查找,二分查找还有许多实现细节,参考

https://blog.csdn.net/xiao_jj_jj/article/details/106018702

哈希查找运用线性哈希,直接写代码。计时功能运用C++11高精度计时库chrono,头文件。

3.具体代码

#include <iostream>
#include <algorithm>
#include <ctime>
#include <fstream>
#include <random>
#include <chrono>
using namespace std;

const int minn = 0;
const int max0 = 1e7;
bool hashtable[10 * max0 + 5] = { 0 };
int* arr = new int[1000005]();

void Init(fstream& in, int A[]) {
    int temp = 0, cnt = 0;
    while (!in.eof() && cnt < max0) {
        in >> temp;
        A[cnt] = temp;
        cnt++;
    }
}
//从文件输入
void Init(fstream& in, int arr[], bool hash[]) {
    int temp=0, cnt=0, key = 0;
    fill(hash, hash + max0, 0);
    while ((!in.eof()) && cnt < max0) {
        in >> temp;
        arr[cnt] = temp;
        key = temp % max0;
        hash[key] = 1;//线性hash
        cnt++;
    }
}
//创建测试数据
void createTestData() {
    static default_random_engine ge;//创建引擎
    ge.seed(time(0));
    static uniform_int_distribution<int> dis(minn, max0);//创建取值范围
    //让每次生成的随机数不同,设置种子(定义前后均可设置种子)
    //default_random_engin ge2(12345);创建引擎设置
    /*default_random_engine ge3;
    ge3.seed(time(0));随机种子*/
    //定义成static多次调用同一对范围和引擎使得每一次生成的数不一样。
    int maxn = max0 / 10;
    //数组引索随机数
    uniform_int_distribution<int> dis1(minn, maxn);
    string files[7] = { "100.txt","1k.txt","10k.txt","100k.txt","1M.txt","success.txt","fail.txt" };
    fstream file;
    //产生1M随机数
    for (int i = 0; i < maxn; i++) {
        arr[i] = dis(ge);
        hashtable[arr[i]] = 1;//已有数
    }
    //生成100个随机数同时将这组数据作为成功查找样例
    file.open(files[0], ios::out);
    int* arr100 = new int[100]();
    for (int i = 0; i < 100; i++) {
        arr100[i] = arr[dis1(ge)];
        file << arr100[i] << endl;
    }
    file.close();
    //success
    file.open(files[5], ios::out);
    int* success = new int[100]();
    for (int i = 0; i < 100; i++) {
        success[i] = arr100[i];
        file << arr100[i] << endl;
    }
    delete[] arr100;
    file.close();
    //1k
    file.open(files[1], ios::out);
    int* arr1k = new int[1000]();
    for (int i = 0; i < 1000; i++) {//平均散布100个样例
        if (i % 10 == 0 && i != 0) {
            arr1k[i] = success[i / 10];
        }
        else arr1k[i] = arr[dis1(ge)];
    }
    for (int i = 0; i < 1000; i++)
        file << arr1k[i] << endl;
    delete[] arr1k;
    file.close();
    //10k
    file.open(files[2], ios::out);
    int* arr10k = new int[10000]();
    for (int i = 0; i < 10000; i++) {
        if (i % 10 == 0 && i != 0) {
            arr10k[i] = success[i / 100];
        }
        else arr10k[i] = arr[dis1(ge)];
    }
    for (int i = 0; i < 10000; i++)
        file << arr10k[i] << endl;
    delete[] arr10k;
    file.close();
    //100k
    file.open(files[3], ios::out);
    int* arr100k = new int[100000]();
    for (int i = 0; i < 100000; i++) {
        if (i % 10 == 0 && i != 0) {
            arr100k[i] = success[i / 1000];
        }
        else arr100k[i] = arr[dis1(ge)];
    }
    for (int i = 0; i < 100000; i++)
        file << arr100k[i] << endl;
    delete[] arr100k;
    file.close();
    //1M
    file.open(files[4], ios::out);
    for (int i = 0; i < 1000000; i++)
        file << arr[i] << endl;
    file.close();
    //失败查找数据文件,即没有产生的数据且在成功数据集范围内.
    file.open(files[6], ios::out);
    int cnt = 0, number = 0;
    while (cnt < 100) {
        number = dis(ge);
        if (hashtable[number] == 0) {
            file << number << endl;
            cnt++;
        }
    }
    file.close();
    cout << "测试数据创建完毕." << endl;
    cout << "------------------------------------------" << endl;
}
//bianry search
void bin_search(fstream& out, int arr[], int n, int success[], int fail[]) {
    using  namespace std::chrono;
    sort(arr, arr + n);//排序
    int cnt, num = 0;
    int left = 0, right = 0;
    bool flag = 0;//查找成功值
    double mints = 100.0, maxts = 0, averages = 0, mintf = 100.0, maxtf = 0, averagef = 0;
    for (int i = 0; i < 200; i++) {
        //查找成功与失败样例
        if (i < 100) {
            num = success[i % 100];
        }
        else num = fail[i % 100];
        cnt = 0;
        left = 0, right = n - 1;
        flag = 0;
        auto start = steady_clock::now();
        while (left <= right) {
            cnt++;
            int mid = left + (right - left) / 2;//比较后重置中间值,计算mid防溢出
            if (arr[mid] == num) {
                flag = 1;
                break;
            }
            else if (arr[mid] > num) right = mid - 1;//重设边界
            else if (arr[mid] < num) left = mid + 1;
        }
        auto end = steady_clock::now();
        duration<double> elapsed = end - start;
        if (flag == 1) {
            std::cout << "二分查找" << num << "成功 	" << "查找次数:" << cnt << endl;
            out << "二分查找" << num << "成功 	" << "查找次数:" << cnt << endl;
            mints = min(mints, (double)elapsed.count());
            maxts = max(maxts, (double)elapsed.count());//最大时间
            averages = averages + (double)elapsed.count() / 100;//平均时间
        }
        if (flag == 0) {
            std::cout << "二分查找" << num << "失败 	" << "查找次数:" << cnt << endl;
            out << "二分查找:" << num << "失败 	" << "查找次数:" << cnt << endl;
            mintf = min(mintf, (double)elapsed.count());
            maxtf = max(maxtf, (double)elapsed.count());
            averagef = averagef + (double)elapsed.count() / 100;
        }
        std::cout << "查找时间:" << (double)elapsed.count() << "s" << endl;
        out << "查找时间:" << (double)elapsed.count() << "s" << endl;
    }
    std::cout << "--------------------------------" << endl;
    std::cout << "二分查找成功;" << "最小查找时间=" << mints << "s" << "   " << "最大查找时间" << maxts << "s" << "平均查找时间" << averages << "s" << endl;
    out << "二分查找成功;" << "最小查找时间=" << mints << "s" << "   " << "最大查找时间" << maxts << "s" << "平均查找时间" << averages << endl;
    std::cout << "二分查找失败;" << "最小查找时间=" << mintf << "s" << "   " << "最大查找时间" << maxtf << "s" << "平均查找时间" << averagef << "s" << endl;
    out << "二分查找失败;" << "最小查找时间=" << mintf << "s" << "   " << "最大查找时间" << maxtf << "s" << "平均查找时间" << averagef << endl;
}
//hash search
void hashsearch(fstream& out, bool h[], int success[], int fail[]) {
    using namespace std::chrono;
    double mints = 100.0, maxts = 0, averages = 0, mintf = 100.0, maxtf = 0, averagef = 0;
    bool flag = 0;
    int test = 0;
    int cnt = 1;
    for (int i = 0; i < 200; i++) {
        flag = 0;
        if (i < 100) {
            test = success[i % 100];
        }
        else {
            test = fail[i % 100];
        }
        time_point<steady_clock> start_h = steady_clock::now();
        if (h[test]) flag = 1;
        else flag = 0;
        time_point<steady_clock> end_h = steady_clock::now();
        duration<double> elapsed_h = end_h - start_h;
        if (flag == 1) {
            std::cout << "哈希查找:" << test << "成功 	" << "查找次数:" << cnt << endl;
            out << "哈希查找:" << test << "成功 	" << "查找次数:" << cnt << endl;
            mints = min(mints, (double)elapsed_h.count());
            maxts = max(maxts, (double)elapsed_h.count());//最大时间
            averages = averages + (double)elapsed_h.count() / 100;//平均时间
        }
        if (flag == 0) {
            std::cout << "哈希查找:" << test << "失败 	" << "次数:" << cnt << endl;
            out << "哈希查找:" << test << "失败 	" << "次数:" << cnt << endl;
            mintf = min(mintf, (double)elapsed_h.count());
            maxtf = max(maxtf, (double)elapsed_h.count());
            averagef = averagef + (double)elapsed_h.count() / 100;
        }
        cout << "查找时间:" << (double)elapsed_h.count() << "s" << endl;
        out << "查找时间:" << (double)elapsed_h.count() << "s" << endl;
    }
    std::cout << "--------------------------------" << endl;
    std::cout << "哈希查找成功;" << "最小查找时间=" << mints << "s" << "   " << "最大查找时间" << maxts << "s" << "平均查找时间" << averages << "s" << endl;
    out << "哈希查找成功;" << "最小查找时间=" << mints << "s" << "   " << "最大查找时间" << maxts << "s" << "平均查找时间" << averages << "s" << endl;
    std::cout << "哈希查找失败;" << "最小查找时间=" << mintf << "s" << "   " << "最大查找时间" << maxtf << "s" << "平均查找时间" << averagef << "s" << endl;
    out << "哈希查找失败;" << "最小查找时间=" << mintf << "s" << "   " << "最大查找时间" << maxtf << "s" << "平均查找时间" << averagef << "s" << endl;
}

int main() {
    createTestData();
    string files[12] = { "100.txt","1k.txt","10k.txt","100k.txt","1M.txt","success.txt","fail.txt","100out.txt","1kout.txt","10kout.txt","100kout.txt","1Mout.txt" };
    int* su = new int[105]();
    int* fa = new int[105]();
    fstream file;
    file.open(files[5], ios::in);
    Init(file, su);
    file.close();
    file.open(files[6], ios::in);
    Init(file, fa);
    file.close();
    for (int i = 0; i < 5; i++) {
        file.open(files[i], ios::in);
        Init(file, arr, hashtable);
        file.close();
        file.open(files[i + 7], ios::out);
        std::cout << "数据规模为" << 100 * pow(10, i) << "哈希查找测试数据:" << endl;
        file << "数据规模为" << 100 * pow(10, i) << "哈希查找数据测试:" << endl;
        hashsearch(file, hashtable, su, fa);
        std::cout << "数据规模为" << 100 * pow(10, i) << "二分查找数据测试:" << endl;
        file << "数据规模为" << 100 * pow(10, i) << "二分查找数据测试:" << endl;
        bin_search(file, arr, 100 * pow(10, i), su, fa);
        file.close();
    }
    delete[] arr;
    delete[] su;
    delete[] fa;
    return 0;
}

本实验代码参考学长代码并进行改良,测试数据更加平均,且计时与随机数生成部分用C++11重写,参考代码:

https://blog.csdn.net/weixin_44307065/article/details/103441948

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值