概述
前面,我们已经学习过了如何进行内排序,以及内排序的常见算法,但是当文件过于大时会出现什么情况呢?如何进行排序呢?为了解决这些问题,置换选择排序算法出现了。
预备知识
1.相关库
include<math.h>
include<time.h>
include<memory.h>
算法过程
首先用当前时间当做随机数生成器,生成大量的随机数,并写入一个txt文件。我们要做的就是对文件中的随机数进行排序,我们假设内存中最多能同时处理10 000个数,而生成了100 000个随机数。我们要处理的就是对这么多个数进行排序。并且我们用到了一个堆结构(最小堆),来进行处理。
临界情况
- 直接输出最小堆中的数
- 输出一部分至目前的文件,另一部分数到下一个文件。
- 全部输出到下一个文件。
##代码:
#include <iostream>
#include<stdio.h>
#include<time.h>
#include<stdlib.h>
#include<memory.h>
#include<assert.h>
#include<unistd.h> // windows.h sleep()
#include<math.h>
#define M_SIZE 100000 //Memory Size
#define _CRT_SECURE_NO_WARNINGS
typedef struct Fout_List
{
FILE* fpoint;
Fout_List* next;
} *Flist;
Flist Replace_Selection(FILE* fin, Flist fout, long long int num, int* array, int M_size);
void swap(int* arr, int i,int j);
void HeapAdjust(int* a, int s, int m);
int partition(int* arr,int left,int right);
void quicksort(int*arr, int left, int right);
void shell(int* arr, int size);
int main()
{
double sum_time = 0, average_time = 0;
clock_t start;
FILE* fin; //Unsorted record
FILE* log; //Running time
//fopen_s(&log, "log.txt", "a+");
log = fopen("log.txt", "a+"); // fopen_s(log, )
assert(log);
time_t current = time(¤t);/*Get current time*/
//char buff[26];
char* buff;
//ctime(buff, sizeof(buff), ¤t);
buff = ctime(¤t);
fprintf(log, "%s", buff);/*Print current time*/
long long int num = 0; //The size of the array
for (int m = 0; m < 1; m++) //Adjust the size of array.
{
num += 1000000;
printf("num = %lld\n", num);
int* array = (int*)malloc(M_SIZE * sizeof(int));
assert(array);
int repeat = 1; //The repeat time.
for (int n = 0; n < repeat; n++) //Adjust the repeat time.
{
printf("\trepeat = %d\n", repeat);
sum_time = 0;
average_time = 0;
for (int j = 0; j < 1; j++)
{
//fopen_s(&fin, "data.txt", "w");
fin = fopen("data.txt", "w");
assert(fin);
srand((unsigned)(time(NULL))); //Initialize random number generator
for (int i = 0; i < num; i++)
{
int temp = rand();
fprintf(fin, "%d\t", temp);
}
fclose(fin);
Flist fout = NULL;
memset(array, 0,M_SIZE * sizeof(int));
start = clock(); //
fout = Replace_Selection(fin, fout, num,array, M_SIZE); //
sum_time += ((double)clock() - (double)start); //Calculate cumulative time
}
sum_time /= CLOCKS_PER_SEC;
average_time = sum_time / repeat;
fprintf(log, "NUM:\t%lld\tREPEAT:\t%d\tS_TIME:\t%lf\t\tA_TIME:\t%lf\n", num, repeat, sum_time, average_time);
fprintf(log, "\n");
}
free(array);
}
fprintf(log, "\n\n");
fclose(log);
return 0;
}
void swap(int * arr,int i,int j)
{
int temp = arr[i];
arr[i] = arr[j];
arr[j] = temp;
}
void shellsort(int* arr, int length)
{
int len = length;
for (int gap = len/2; gap>0;gap = (int)(gap/2)) {
//多个分组交替进行
for (int i = gap; i <len; ++i) {
int j = i;
int current = arr[i];
while(j-gap>=0&¤t<arr[j-gap])
{
arr[j] = arr[j-gap];
j = j - gap;
}
arr[j] = current;
}
}
}
int partition(int* arr,int left,int right)
{
int pivot = left;
int index = pivot+1;
for (int i = index; i <right ; ++i) {
if(arr[i]<arr[pivot])
{
swap(arr,i,index);
index++;
}
}
swap(arr,pivot,index-1);
return index-1;
}
/*
* 快速排序(Quick Sort)
* 算法描述:
* 利用分治法把一个串分为两个子串
* 从序列中挑选一个元素,称为基准
* 重新排序数列,所有元素比基准小的摆在基准前面,所有元素比基准大的摆在基准的后面(相同的数可以放到任一边)
* 在这个分区退出之后,该基准就处于数列的中间位置,称之为分区操作
* 递归的把小于基准值的子数列和大于基准值的子序列排序
*/
void quicksort(int * arr,int left,int right)
{
//int len = arr->length;
int partitionindex;
if(left<right)
{
partitionindex = partition(arr,left,right);
quicksort(arr,left,partitionindex-1);
quicksort(arr,partitionindex+1,right);
}
}
/*
* 新加入一个结点至跟节点处,并调整堆结构
*/
void HeapAdjust(int* arr, int size, int i)
{
int left_child = 2*i + 1;
int right_child= 2*i + 2;
int min = i; //选出当前结点与其左右孩子三者之中的最小值
if(left_child<size && arr[left_child] < arr[min])
{
min = left_child;
}
if(right_child<size && arr[right_child] < arr[min])
{
min = right_child;
}
if(min != i)
{
swap(arr, i, min); //将最小值结点与父结点互换
HeapAdjust(arr, size, min); //递归调用
}
}
/*
* 构建最小值堆
*/
void BuildMinHeap(int* arr, int size)
//对每一个非叶子结点向下进行最小堆排序
{
int i = 0;
for(i = size/2-1; i >= 0; i--)
{
HeapAdjust(arr, size, i);
}
}
/*
* 在选择排序中,当将最小值堆的根结点输出后,
* 再从缓冲区扫描入一个元素,再调整堆结构
*/
int HeadAdjust(int* arr, int size, int element)
{
//此时arr已经是最小值堆
if(element>=arr[0])
{
//如果新加进来的元素大于原最小值,则将新元素放在堆顶,并重新建堆
arr[0] = element;
BuildMinHeap(arr, size);
return size;
}
if(element<arr[0])
{
arr[0] = arr[size-1];
arr[size-1] = element;
BuildMinHeap(arr, size-1);
size = size - 1;
return size;
}
}
/*
* 现在没有外部元素传入
*/
//未进行归并,num是文件中数据的个数,fout为要写入的文件
//num为数据大小
//M_size为内存大小
void killbill()
{
}
Flist Replace_Selection(FILE* fin, Flist fout, long long int num, int* array, int M_size)
{
char fname[20];
memset(fname, 0, 20);
int fnum = 0;
fin = fopen("data.txt", "rb");
assert(fin);
for (int i = 0; i < M_size; i++)
{
fscanf(fin, "%d", array + i); // pointer point to M_size
}
//Complete the following code
BuildMinHeap(array, M_size); // 建堆
int temp = M_size;
int* element = (int*)malloc(sizeof(int));
fout = (Flist)malloc(sizeof(Fout_List));
assert(fout);
/*
* 当temp = 0时说明一个顺串已经形成
* 那么之后我们应该进行第二个顺串了
* 循环结束条件是fin中还有数据
*/
fseek(fin, 0L, SEEK_END);
long long int len1 = ftell(fin);
fseek(fin, M_size, SEEK_SET);
long long int len2 = ftell(fin);
long long int count = len2;
sprintf(fname, "fout/%d.txt", fnum++);
fout->fpoint = fopen(fname, "w");
while(temp>0)
{
fprintf(fout->fpoint, "%d\t", array[0]);
fscanf(fin, "%d", element);
temp = HeadAdjust(array, temp, *element);
count--;
if(count!=0&&temp==0)
{
fclose(fout->fpoint);
temp = M_size;
BuildMinHeap(array, M_size);
sprintf(fname, "fout/%d.txt", fnum++);
fout->next = (Flist)malloc(sizeof(Fout_List));
assert(fout->next);
fout = fout->next;
fout->fpoint = fopen(fname, "w");
}
if(count==0&&temp==M_size)
{
// 只有正序符合这种情况
quicksort(array, 0 ,M_size);
for (int i = 0; i < M_size; ++i) {
fprintf(fout->fpoint, "%d", array[i]);
}
fclose(fout->fpoint);
break;
}
if(count==0&&temp==0)
{
fclose(fout->fpoint);
fout->next = (Flist)malloc(sizeof(Fout_List));
fout = fout->next;
sprintf(fname, "fout/%d.txt", fnum++);
fout->fpoint = fopen(fname, "w");
assert(fout->fpoint);
quicksort(array, 0, temp);
for (int i = 0; i < M_size; ++i) {
fprintf(fout->fpoint, "%d\t", array[i]);
}
fclose(fout->fpoint);
break;
}
if(count==0&&temp>0)
{
quicksort(array, 0, temp);
for (int i = 0; i <temp; ++i) {
fprintf(fout->fpoint, "%d\t", array[i]);
}
fclose(fout->fpoint);
sprintf(fname, "fout/%d.txt", fnum);
fout->next = (Flist)malloc(sizeof(Fout_List));
fout = fout->next;
fout->fpoint = fopen(fname, "w");
assert(fout->fpoint);
quicksort(array, temp, M_size);
for (int j = temp; j < M_size; ++j) {
fprintf(fout->fpoint, "%d\t", array[j]);
}
fclose(fout->fpoint);
break;
}
}
fout->next = nullptr;
return fout;
}
如有不明白的地方,请联系我QQ1972135329, 随时。
我爱王艺娴。