实验目的
基于教材内容,从顺序查找、二分查找、基于BST的查找和哈希中任选两种查找算法,实现并比较性能。
基本要求
(1)对实现的查找算法进行实验比较,在不同数据规模(N)下执行100次成功查找,以表格形式记录最小、最大和平均查找时间;在不同数据规模(N)下执行100次不成功查找,以表格形式记录最小、最大和平均查找时间。
(2)查找算法要基于教材,测试输入的整数数据文件(5个,文件中数据规模N分别是100,1K,10K,100K和1M),每次查找的比较次数和时间也要输出到文件中。
(3)提交最终实验作业。用附件的形式,提交两个文件:一个压缩包(包含源码和5个用于查找测试的数据文件);一个pdf文档(文档中包含实验日志和一个根据基本要求(1)记录实验结果的表格,然后进行适当的实验结果分析)。
本次实验我选择了哈希查找与二分查找。
首先需要了解二分查找的具体含义:二分查找必须用顺序存储结构进行数据存储,即查找前待测表必须为有序,这是他的缺点,但是优点是每次查找进行关键字中间位置的比较,依次二分下去,直到查找成功或查找不成功。二分查找需要使用顺序结构,这里直接选择数组。
哈希是通过哈希函数对于关键字的映射得到哈希地址建立哈希表,所以每次查找只需要知道哈希函数即可直接取得所查记录,但是同一哈希函数必定会得到冲突的哈希地址,所以现在选取什么哈希函数来建立哈希表成为一个问题。
1.编写建立测试数据函数
通过C++11random库进行随机数生成,详见上一篇随机数生成。
2.编写查找函数代码
二分查找需要有序顺序表,所以先对数据进行排序,再进行二分查找,二分查找还有许多实现细节,参考
https://blog.csdn.net/xiao_jj_jj/article/details/106018702
哈希查找运用线性哈希,直接写代码。计时功能运用C++11高精度计时库chrono,头文件。
3.具体代码
#include <iostream>
#include <algorithm>
#include <ctime>
#include <fstream>
#include <random>
#include <chrono>
using namespace std;
const int minn = 0;
const int max0 = 1e7;
bool hashtable[10 * max0 + 5] = { 0 };
int* arr = new int[1000005]();
void Init(fstream& in, int A[]) {
int temp = 0, cnt = 0;
while (!in.eof() && cnt < max0) {
in >> temp;
A[cnt] = temp;
cnt++;
}
}
//从文件输入
void Init(fstream& in, int arr[], bool hash[]) {
int temp=0, cnt=0, key = 0;
fill(hash, hash + max0, 0);
while ((!in.eof()) && cnt < max0) {
in >> temp;
arr[cnt] = temp;
key = temp % max0;
hash[key] = 1;//线性hash
cnt++;
}
}
//创建测试数据
void createTestData() {
static default_random_engine ge;//创建引擎
ge.seed(time(0));
static uniform_int_distribution<int> dis(minn, max0);//创建取值范围
//让每次生成的随机数不同,设置种子(定义前后均可设置种子)
//default_random_engin ge2(12345);创建引擎设置
/*default_random_engine ge3;
ge3.seed(time(0));随机种子*/
//定义成static多次调用同一对范围和引擎使得每一次生成的数不一样。
int maxn = max0 / 10;
//数组引索随机数
uniform_int_distribution<int> dis1(minn, maxn);
string files[7] = { "100.txt","1k.txt","10k.txt","100k.txt","1M.txt","success.txt","fail.txt" };
fstream file;
//产生1M随机数
for (int i = 0; i < maxn; i++) {
arr[i] = dis(ge);
hashtable[arr[i]] = 1;//已有数
}
//生成100个随机数同时将这组数据作为成功查找样例
file.open(files[0], ios::out);
int* arr100 = new int[100]();
for (int i = 0; i < 100; i++) {
arr100[i] = arr[dis1(ge)];
file << arr100[i] << endl;
}
file.close();
//success
file.open(files[5], ios::out);
int* success = new int[100]();
for (int i = 0; i < 100; i++) {
success[i] = arr100[i];
file << arr100[i] << endl;
}
delete[] arr100;
file.close();
//1k
file.open(files[1], ios::out);
int* arr1k = new int[1000]();
for (int i = 0; i < 1000; i++) {//平均散布100个样例
if (i % 10 == 0 && i != 0) {
arr1k[i] = success[i / 10];
}
else arr1k[i] = arr[dis1(ge)];
}
for (int i = 0; i < 1000; i++)
file << arr1k[i] << endl;
delete[] arr1k;
file.close();
//10k
file.open(files[2], ios::out);
int* arr10k = new int[10000]();
for (int i = 0; i < 10000; i++) {
if (i % 10 == 0 && i != 0) {
arr10k[i] = success[i / 100];
}
else arr10k[i] = arr[dis1(ge)];
}
for (int i = 0; i < 10000; i++)
file << arr10k[i] << endl;
delete[] arr10k;
file.close();
//100k
file.open(files[3], ios::out);
int* arr100k = new int[100000]();
for (int i = 0; i < 100000; i++) {
if (i % 10 == 0 && i != 0) {
arr100k[i] = success[i / 1000];
}
else arr100k[i] = arr[dis1(ge)];
}
for (int i = 0; i < 100000; i++)
file << arr100k[i] << endl;
delete[] arr100k;
file.close();
//1M
file.open(files[4], ios::out);
for (int i = 0; i < 1000000; i++)
file << arr[i] << endl;
file.close();
//失败查找数据文件,即没有产生的数据且在成功数据集范围内.
file.open(files[6], ios::out);
int cnt = 0, number = 0;
while (cnt < 100) {
number = dis(ge);
if (hashtable[number] == 0) {
file << number << endl;
cnt++;
}
}
file.close();
cout << "测试数据创建完毕." << endl;
cout << "------------------------------------------" << endl;
}
//bianry search
void bin_search(fstream& out, int arr[], int n, int success[], int fail[]) {
using namespace std::chrono;
sort(arr, arr + n);//排序
int cnt, num = 0;
int left = 0, right = 0;
bool flag = 0;//查找成功值
double mints = 100.0, maxts = 0, averages = 0, mintf = 100.0, maxtf = 0, averagef = 0;
for (int i = 0; i < 200; i++) {
//查找成功与失败样例
if (i < 100) {
num = success[i % 100];
}
else num = fail[i % 100];
cnt = 0;
left = 0, right = n - 1;
flag = 0;
auto start = steady_clock::now();
while (left <= right) {
cnt++;
int mid = left + (right - left) / 2;//比较后重置中间值,计算mid防溢出
if (arr[mid] == num) {
flag = 1;
break;
}
else if (arr[mid] > num) right = mid - 1;//重设边界
else if (arr[mid] < num) left = mid + 1;
}
auto end = steady_clock::now();
duration<double> elapsed = end - start;
if (flag == 1) {
std::cout << "二分查找" << num << "成功 " << "查找次数:" << cnt << endl;
out << "二分查找" << num << "成功 " << "查找次数:" << cnt << endl;
mints = min(mints, (double)elapsed.count());
maxts = max(maxts, (double)elapsed.count());//最大时间
averages = averages + (double)elapsed.count() / 100;//平均时间
}
if (flag == 0) {
std::cout << "二分查找" << num << "失败 " << "查找次数:" << cnt << endl;
out << "二分查找:" << num << "失败 " << "查找次数:" << cnt << endl;
mintf = min(mintf, (double)elapsed.count());
maxtf = max(maxtf, (double)elapsed.count());
averagef = averagef + (double)elapsed.count() / 100;
}
std::cout << "查找时间:" << (double)elapsed.count() << "s" << endl;
out << "查找时间:" << (double)elapsed.count() << "s" << endl;
}
std::cout << "--------------------------------" << endl;
std::cout << "二分查找成功;" << "最小查找时间=" << mints << "s" << " " << "最大查找时间" << maxts << "s" << "平均查找时间" << averages << "s" << endl;
out << "二分查找成功;" << "最小查找时间=" << mints << "s" << " " << "最大查找时间" << maxts << "s" << "平均查找时间" << averages << endl;
std::cout << "二分查找失败;" << "最小查找时间=" << mintf << "s" << " " << "最大查找时间" << maxtf << "s" << "平均查找时间" << averagef << "s" << endl;
out << "二分查找失败;" << "最小查找时间=" << mintf << "s" << " " << "最大查找时间" << maxtf << "s" << "平均查找时间" << averagef << endl;
}
//hash search
void hashsearch(fstream& out, bool h[], int success[], int fail[]) {
using namespace std::chrono;
double mints = 100.0, maxts = 0, averages = 0, mintf = 100.0, maxtf = 0, averagef = 0;
bool flag = 0;
int test = 0;
int cnt = 1;
for (int i = 0; i < 200; i++) {
flag = 0;
if (i < 100) {
test = success[i % 100];
}
else {
test = fail[i % 100];
}
time_point<steady_clock> start_h = steady_clock::now();
if (h[test]) flag = 1;
else flag = 0;
time_point<steady_clock> end_h = steady_clock::now();
duration<double> elapsed_h = end_h - start_h;
if (flag == 1) {
std::cout << "哈希查找:" << test << "成功 " << "查找次数:" << cnt << endl;
out << "哈希查找:" << test << "成功 " << "查找次数:" << cnt << endl;
mints = min(mints, (double)elapsed_h.count());
maxts = max(maxts, (double)elapsed_h.count());//最大时间
averages = averages + (double)elapsed_h.count() / 100;//平均时间
}
if (flag == 0) {
std::cout << "哈希查找:" << test << "失败 " << "次数:" << cnt << endl;
out << "哈希查找:" << test << "失败 " << "次数:" << cnt << endl;
mintf = min(mintf, (double)elapsed_h.count());
maxtf = max(maxtf, (double)elapsed_h.count());
averagef = averagef + (double)elapsed_h.count() / 100;
}
cout << "查找时间:" << (double)elapsed_h.count() << "s" << endl;
out << "查找时间:" << (double)elapsed_h.count() << "s" << endl;
}
std::cout << "--------------------------------" << endl;
std::cout << "哈希查找成功;" << "最小查找时间=" << mints << "s" << " " << "最大查找时间" << maxts << "s" << "平均查找时间" << averages << "s" << endl;
out << "哈希查找成功;" << "最小查找时间=" << mints << "s" << " " << "最大查找时间" << maxts << "s" << "平均查找时间" << averages << "s" << endl;
std::cout << "哈希查找失败;" << "最小查找时间=" << mintf << "s" << " " << "最大查找时间" << maxtf << "s" << "平均查找时间" << averagef << "s" << endl;
out << "哈希查找失败;" << "最小查找时间=" << mintf << "s" << " " << "最大查找时间" << maxtf << "s" << "平均查找时间" << averagef << "s" << endl;
}
int main() {
createTestData();
string files[12] = { "100.txt","1k.txt","10k.txt","100k.txt","1M.txt","success.txt","fail.txt","100out.txt","1kout.txt","10kout.txt","100kout.txt","1Mout.txt" };
int* su = new int[105]();
int* fa = new int[105]();
fstream file;
file.open(files[5], ios::in);
Init(file, su);
file.close();
file.open(files[6], ios::in);
Init(file, fa);
file.close();
for (int i = 0; i < 5; i++) {
file.open(files[i], ios::in);
Init(file, arr, hashtable);
file.close();
file.open(files[i + 7], ios::out);
std::cout << "数据规模为" << 100 * pow(10, i) << "哈希查找测试数据:" << endl;
file << "数据规模为" << 100 * pow(10, i) << "哈希查找数据测试:" << endl;
hashsearch(file, hashtable, su, fa);
std::cout << "数据规模为" << 100 * pow(10, i) << "二分查找数据测试:" << endl;
file << "数据规模为" << 100 * pow(10, i) << "二分查找数据测试:" << endl;
bin_search(file, arr, 100 * pow(10, i), su, fa);
file.close();
}
delete[] arr;
delete[] su;
delete[] fa;
return 0;
}
本实验代码参考学长代码并进行改良,测试数据更加平均,且计时与随机数生成部分用C++11重写,参考代码:
https://blog.csdn.net/weixin_44307065/article/details/103441948