《数据结构与算法分析——C语言描述》 第七章
初始化串很有意思。数字以文本模式存放在文件中,每个数字大小不相同,那么字符长度也不一样,想要提前知道一个文件有多少数字只能遍历一遍文件计数,硬盘读写慢的要命,就算是ssd也才读写500M/s,内存(不包括cache命中)20000M/s。但多相合并根据数字数量按照斐波那契列分配到文件中,所以一定要读一个数存一个数。
verson1
这个有个问题,文件结尾是空格,就算用feof并不能知道是文件结尾,造成迭代多一次添加了不必要的哑串。改成另一种模式,循环的进入口是是否成功读入数字,换成单循环。
void initRun(char *inputFileName) {
int max_memory[M];
//初始化顺序串
FILE *ori = fopen(inputFileName, "r");
char name[20];
for (int i = 0; i < K; i++)//打开文件写
file[i] = fopen(fileName(name, i), "w");
int dummyNumCnt = 0;
int writeNum = 0;//0表示t1,1表示t2,……
int fibonacci[K];//K阶斐波那契数列
initFibonacci(fibonacci);//初始化
memset(runLen, 0, sizeof(runLen));//所有文件的顺序串数量为0
int end = 0;
while (!end) {
for (writeNum = 0; writeNum < K; writeNum++) {//一个一个文件来
while (runNum[writeNum] < fibonacci[K - 1 - writeNum]) {//每个文件逐次的满足斐波那契数列
if (end == 0) {
int readNum = 0;
while (readNum < M && fscanf(ori, "%d", &max_memory[readNum]) != EOF) {
readNum++;
}
if (readNum < M) {
end = 1;
dummyNumCnt += (M - readNum);
for (int i = readNum; i < M; i++)
max_memory[i] = 0;
}
quickSort_my(max_memory, M);
//交替写到tb1或写到tb2……
write(max_memory, readNum, file[writeNum]);
}
else {
memset(max_memory, 0, sizeof(max_memory));
write(max_memory, M, file[writeNum]);
dummyNumCnt += M;
}
runNum[writeNum]++;
}
}
updateFibonacci(fibonacci);
}
fclose(ori);
for (int i = 0; i < K; i++)
fclose(file[i]);
}
verson2
实在想不到怎样用K路合并,想到的是一个文件对另外K-1个文件进行2路合并,宏定义的K为3的时候没有问题。K改成4、其他的就出问题了。代码写得一坨屎。。。这个断断续续写了我六七天了。深感智商之捉急。
想了一下,K大于等于4出问题的原因是,仅靠最长文件序号、写的序号、读的序号是不能判断一次循环哪些文件是处理过的,得用表示。
#include <stdio.h>
#include <stdlib.h>
#include<string.h>
#include<queue>
#include"fatal.h"
#define M 3
#define K 3
typedef int ElementType;
void insertionSort(int *a, int n) {
int j, p;
int temp;
for (p = 1; p < n; p++) {
temp = a[p];
for (j = p; j > 0 && temp < a[j - 1]; j--)
a[j] = a[j - 1];
a[j] = temp;
}
}
void swap_my(ElementType *a, ElementType *b) {
ElementType temp;
temp = *a;
*a = *b;
*b = temp;
}
ElementType median3(ElementType a[], int left, int right) {
int center = (left + right) / 2;
if (a[left] > a[center])
swap_my(&a[left], &a[center]);
if (a[left] > a[right])
swap_my(&a[left], &a[right]);
if (a[center] > a[right])
swap_my(&a[center], &a[right]);
swap_my(&a[center], &a[right - 1]);
return a[right - 1];
}
#define CUTOFF (3)
void qsort_my(ElementType a[], int left, int right) {
if (left + CUTOFF <= right) {
int i, j;
ElementType pivot;
pivot = median3(a, left, right);
i = left;
j = right - 1;
while (1) {
while (a[++i] < pivot) {}
while (a[--j] > pivot) {}
if (i < j)
swap_my(&a[i], &a[j]);
else
break;
}
swap_my(&a[i], &a[right - 1]);
qsort_my(a, left, i - 1);
qsort_my(a, i + 1, right);
}
else
insertionSort(a + left, right - left + 1);
}
void quickSort_my(ElementType a[], int n) {
qsort_my(a, 0, n - 1);
}
FILE* file[K + 1];//文件指针数组
int runLen[K + 1];//每个文件对应的顺序串长度
int runNum[K + 1];//每个文件的顺序串数量
char name[200];//生成的名字
void write(int *a, int n, FILE *out) {
for (int i = 0; i < n; i++) {
fprintf(out, "%d ", a[i]);
}
}
char* fileName(char *buf, int i) {
strcpy(buf, "T");
char num[5];
strcat(buf, _itoa(i + 1, num, 10));
return buf;
}
typedef std::pair<int, int> Pair_int;
auto cmp = [](const Pair_int& left, const Pair_int& right) { return (left.first) > (right.first); };//lambda表达式,算是一种比较精简的函数吧
int RandInt(int i, int j) {
int temp;
temp = (int)(i + (1.0*rand() / RAND_MAX)*(j - i));
return temp;
}
void getRandomInt(int *A, int n) {
for (int i = 0; i < n; i++) {
A[i] = i + 1;
}
for (int i = 1; i < n; i++) {
//std::swap(A[i], A[RandInt(0, i)]);
int randAdrr = RandInt(0, i);
int t = A[i];
A[i] = A[randAdrr];
A[randAdrr] = t;
}
}
#define N 100
void writeRandIntToFile() {
int a[N];
getRandomInt(a, N);
FILE *fp = fopen("ta1", "w");
for (int &i : a)
fprintf(fp, "%d ", i);
fclose(fp);
}
void initFibonacci(int *arr) {//给K阶的斐波那契数列的初始化
int i;
for (i = 0; i < K - 2; i++)
arr[i] = 0;
arr[i] = 1;//k-2
arr[i + 1] = 1;//k-1
}
void updateFibonacci(int *arr) {
int sum = 0;
for (int i = 0; i < K - 1; i++) {
sum += arr[i];
arr[i] = arr[i + 1];
}
arr[K - 1] += sum;
}
void handleRun(int &readNum, int &writeNum, int *fibonacci, int *max_memory) {
readNum = 0;
quickSort_my(max_memory, M);
if (runNum[writeNum] < fibonacci[K - 1 - writeNum]) {
write(max_memory, M, file[writeNum]);
runNum[writeNum]++;
if (runNum[writeNum] == fibonacci[K - 1 - writeNum]) {//判断是否写满了当前迭代的斐波那契数列
writeNum++;
if (writeNum == K) {
updateFibonacci(fibonacci);
writeNum = 0;
}
}
}
else {//当前的允许顺序串数量为0,进行下一次迭代
writeNum = 0;
updateFibonacci(fibonacci);
handleRun(readNum, writeNum, fibonacci, max_memory);
}
}
void initRun(char *inputFileName) {
int max_memory[M];//模拟的最大内存
//初始化顺序串
FILE *ori = fopen(inputFileName, "r");
char name[20];
for (int i = 0; i < K; i++)//打开文件写
file[i] = fopen(fileName(name, i), "w");
int dummyNumCnt = 0;//记录的哑元数量
int writeNum = 0;//0表示t1,1表示t2,……
int fibonacci[K];//K阶斐波那契数列
initFibonacci(fibonacci);//初始化
for (int i = 0; i < K; i++)//每个文件的顺序串长度初始为M
runLen[i] = M;
int readNum = 0;//最大内存的下标
while (fscanf(ori, "%d", &max_memory[readNum]) != EOF) {
readNum++;
if (readNum < M)//还没读满最大的内存
continue;
handleRun(readNum, writeNum, fibonacci, max_memory);
}
if (readNum != 0) {//一个顺序串未读满,补上0
dummyNumCnt += (M - readNum);
while (readNum < M)
max_memory[readNum++] = 0;
handleRun(readNum, writeNum, fibonacci, max_memory);
}
memset(max_memory, 0, M*sizeof(int));//初始化哑串
while (writeNum < K) {
if (runNum[writeNum] < fibonacci[K - 1 - writeNum]) {//是否写满了当前迭代的斐波那契数列
write(max_memory, M, file[writeNum]);
runNum[writeNum]++;
}
else {
writeNum++;
}
}
fclose(ori);
for (int i = 0; i < K; i++)
fclose(file[i]);
}
int isFinish() {
int cnt = 0;
for (int i = 0; i < K + 1; i++) {
if (runNum[i] >= 1)
cnt++;
if (cnt >= 2) {
return 0;
}
}
return 1;
}
void mergeRun(int longest, int read, int write) {
int i, j;
int a, b;
int hasNum1 = 0, hasNum2 = 0;
for (i = 0, j = 0; i < runLen[longest] && j < runLen[read];) {
if (hasNum1 == 0) {
fscanf(file[longest], "%d", &a);
hasNum1 = 1;
}
if (hasNum2 == 0) {
fscanf(file[read], "%d", &b);
hasNum2 = 1;
}
if (a < b) {
/*if (a < 0)
Error("error1");*/
fprintf(file[write], "%d ", a);
hasNum1 = 0;
i++;
}
else {
/*if (b < 0)
Error("error2");*/
fprintf(file[write], "%d ", b);
hasNum2 = 0;
j++;
}
}
while (i < runLen[longest]) {
if (hasNum1 == 0)
fscanf(file[longest], "%d", &a);
fprintf(file[write], "%d ", a);
hasNum1 = 0;
i++;
}
while (j < runLen[read]) {
if (hasNum2 == 0)
fscanf(file[read], "%d", &b);
fprintf(file[write], "%d ", b);
hasNum2 = 0;
j++;
}
}
int nextReadNum(int longestNum, int writeNum, int now) {
for (int i = now + 1; i < K + 1; i++)
if (runNum[i]>0 && i != longestNum && i != writeNum)
return i;
return -1;
}
int firstReadNum(int longestNum, int writeNum) {
int max = 0;
int pos = -1;
for (int i = 0; i < K + 1; i++)
if (runNum[i]>max && i != longestNum && i != writeNum) {
pos = i;
max = runNum[i];
}
return pos;
}
int main() {
writeRandIntToFile();
char inputFileName[20] = "ta1";
//scanf("%s", inputFileName);
initRun(inputFileName);
int oldLongestNum=-1;
int longestNum = 0;
int nextLongestNum;
int writeNum = K;
int readNum = 1;
int testCnt = 0;
int oldLongestOpenTag = 0;
while (!isFinish()) {
int cnt = 0;
file[longestNum] = fopen(fileName(name, longestNum), "r");
nextLongestNum = writeNum;
while (cnt < K - 1 && !isFinish()) {
cnt++;
file[writeNum] = fopen(fileName(name, writeNum), "w");
if (readNum != oldLongestNum || (readNum == oldLongestNum && oldLongestOpenTag == 0))
file[readNum] = fopen(fileName(name, readNum), "r");
//runNum[writeNum] = 0;
while (runNum[readNum] > 0) {
mergeRun(longestNum, readNum, writeNum);
runNum[longestNum]--;
runNum[readNum]--;
runNum[writeNum]++;
testCnt++;
}
runLen[writeNum] = runLen[readNum] + runLen[longestNum];
fclose(file[writeNum]);
int oldwriteNum = writeNum;
writeNum = readNum;
fclose(file[readNum]);
readNum = nextReadNum(longestNum, oldwriteNum, readNum);//???
if(readNum==-1)
readNum= nextReadNum(longestNum, oldwriteNum, -1);
}
if (runNum[longestNum] == 0) {
fclose(file[longestNum]);
oldLongestOpenTag = 0;
}
else
oldLongestOpenTag = 1;
oldLongestNum = longestNum;
longestNum = nextLongestNum;
readNum = firstReadNum(longestNum, writeNum);//findBiggesetNum
}
}
verson3
算是终结这个了,用了两个堆,一个队列来表示未处理的文件,已处理的文件,空的文件。逻辑很清晰,代码很优美。
#include <stdio.h>
#include <stdlib.h>
#include<string.h>
#include<queue>
#include"fatal.h"
#define M 3//最大的内存
#define K 8//K路排序
#define N 222//要排序的数字量,1—N
typedef int ElementType;
void insertionSort(int *a, int n) {
int j, p;
int temp;
for (p = 1; p < n; p++) {
temp = a[p];
for (j = p; j > 0 && temp < a[j - 1]; j--)
a[j] = a[j - 1];
a[j] = temp;
}
}
void swap_my(ElementType *a, ElementType *b) {
ElementType temp;
temp = *a;
*a = *b;
*b = temp;
}
ElementType median3(ElementType a[], int left, int right) {
int center = (left + right) / 2;
if (a[left] > a[center])
swap_my(&a[left], &a[center]);
if (a[left] > a[right])
swap_my(&a[left], &a[right]);
if (a[center] > a[right])
swap_my(&a[center], &a[right]);
swap_my(&a[center], &a[right - 1]);
return a[right - 1];
}
#define CUTOFF (3)
void qsort_my(ElementType a[], int left, int right) {
if (left + CUTOFF <= right) {
int i, j;
ElementType pivot;
pivot = median3(a, left, right);
i = left;
j = right - 1;
while (1) {
while (a[++i] < pivot) {}
while (a[--j] > pivot) {}
if (i < j)
swap_my(&a[i], &a[j]);
else
break;
}
swap_my(&a[i], &a[right - 1]);
qsort_my(a, left, i - 1);
qsort_my(a, i + 1, right);
}
else
insertionSort(a + left, right - left + 1);
}
void quickSort_my(ElementType a[], int n) {
qsort_my(a, 0, n - 1);
}
FILE* file[K + 1];//文件指针数组
int runLen[K + 1];//每个文件对应的顺序串长度
int runNum[K + 1];//每个文件的顺序串数量
char name[200];//生成的名字
typedef std::pair<int, int> Pair_int;//first是序号,second是runNum
auto cmp = [](const Pair_int& left, const Pair_int& right) { return (left.second) < (right.second); };//lambda表达式,算是一种比较精简的函数吧,比较的位置是pair的第二个
std::queue<int> nullFile;//空文件列表
std::priority_queue<Pair_int, std::vector<Pair_int>, decltype(cmp)>fileHeap1(cmp);//这里不知道怎么用数组
std::priority_queue<Pair_int, std::vector<Pair_int>, decltype(cmp)>fileHeap2(cmp);
void write(int *a, int n, FILE *out) {
for (int i = 0; i < n; i++) {
fprintf(out, "%d ", a[i]);
}
}
char* fileName(char *buf, int i) {
strcpy(buf, "T");
char num[5];
strcat(buf, _itoa(i + 1, num, 10));
return buf;
}
int RandInt(int i, int j) {
int temp;
temp = (int)(i + (1.0*rand() / RAND_MAX)*(j - i));
return temp;
}
void getRandomInt(int *A, int n) {
for (int i = 0; i < n; i++) {
A[i] = i + 1;
}
for (int i = 1; i < n; i++) {
//std::swap(A[i], A[RandInt(0, i)]);
int randAdrr = RandInt(0, i);
int t = A[i];
A[i] = A[randAdrr];
A[randAdrr] = t;
}
}
void writeRandIntToFile() {
int a[N];
getRandomInt(a, N);
FILE *fp = fopen("ta1", "w");
for (int &i : a)
fprintf(fp, "%d ", i);
fclose(fp);
}
void initFibonacci(int *arr) {//给K阶的斐波那契数列的初始化
int i;
for (i = 0; i < K - 2; i++)
arr[i] = 0;
arr[i] = 1;//k-2
arr[i + 1] = 1;//k-1
}
void updateFibonacci(int *arr) {
int sum = 0;
for (int i = 0; i < K - 1; i++) {
sum += arr[i];
arr[i] = arr[i + 1];
}
arr[K - 1] += sum;
}
void handleRun(int &readNum, int &writeNum, int *fibonacci, int *max_memory) {
readNum = 0;
quickSort_my(max_memory, M);
if (runNum[writeNum] < fibonacci[K - 1 - writeNum]) {
write(max_memory, M, file[writeNum]);
runNum[writeNum]++;
if (runNum[writeNum] == fibonacci[K - 1 - writeNum]) {//判断是否写满了当前迭代的斐波那契数列
writeNum++;
if (writeNum == K) {
updateFibonacci(fibonacci);
writeNum = 0;
}
}
}
else {//当前的允许顺序串数量为0,进行下一次迭代
writeNum = 0;
updateFibonacci(fibonacci);
handleRun(readNum, writeNum, fibonacci, max_memory);
}
}
void initRun(char *inputFileName) {
int max_memory[M];//模拟的最大内存
//初始化顺序串
FILE *ori = fopen(inputFileName, "r");
char name[20];
for (int i = 0; i < K; i++)//打开文件写
file[i] = fopen(fileName(name, i), "w");
int dummyNumCnt = 0;//记录的哑元数量
int writeNum = 0;//0表示t1,1表示t2,……
int fibonacci[K];//K阶斐波那契数列
initFibonacci(fibonacci);//初始化
for (int i = 0; i < K; i++)//每个文件的顺序串长度初始为M
runLen[i] = M;
int readNum = 0;//最大内存的下标
while (fscanf(ori, "%d", &max_memory[readNum]) != EOF) {
readNum++;
if (readNum < M)//还没读满最大的内存
continue;
handleRun(readNum, writeNum, fibonacci, max_memory);
}
if (readNum != 0) {//一个顺序串未读满,补上0
dummyNumCnt += (M - readNum);
while (readNum < M)
max_memory[readNum++] = 0;
handleRun(readNum, writeNum, fibonacci, max_memory);
}
memset(max_memory, 0, M*sizeof(int));//初始化哑串
while (writeNum < K) {
if (runNum[writeNum] < fibonacci[K - 1 - writeNum]) {//是否写满了当前迭代的斐波那契数列
write(max_memory, M, file[writeNum]);
runNum[writeNum]++;
}
else {
writeNum++;
}
}
fclose(ori);
for (int i = 0; i < K; i++) {
fclose(file[i]);
if (runNum[i] > 0) {
fileHeap1.push(std::make_pair(i,runNum[i]));
}
else {
nullFile.push(i);//把空的放到空文件队列中
}
}
nullFile.push(K);//第K+1个磁带是空的
}
void mergeRun(int longest, int read, int write) {
int i, j;
int a, b;
int hasNum1 = 0, hasNum2 = 0;
for (i = 0, j = 0; i < runLen[longest] && j < runLen[read];) {
if (hasNum1 == 0) {
fscanf(file[longest], "%d", &a);
hasNum1 = 1;
}
if (hasNum2 == 0) {
fscanf(file[read], "%d", &b);
hasNum2 = 1;
}
if (a < b) {
/*if (a < 0)
Error("error1");*/
fprintf(file[write], "%d ", a);
hasNum1 = 0;
i++;
}
else {
/*if (b < 0)
Error("error2");*/
fprintf(file[write], "%d ", b);
hasNum2 = 0;
j++;
}
}
while (i < runLen[longest]) {
if (hasNum1 == 0)
fscanf(file[longest], "%d", &a);
fprintf(file[write], "%d ", a);
hasNum1 = 0;
i++;
}
while (j < runLen[read]) {
if (hasNum2 == 0)
fscanf(file[read], "%d", &b);
fprintf(file[write], "%d ", b);
hasNum2 = 0;
j++;
}
}
int main() {
writeRandIntToFile();
char inputFileName[20] = "ta1";//要排序的文件
//scanf("%s", inputFileName);
initRun(inputFileName);//初始化顺序串
auto * notHandle = &fileHeap1;//没有处理的
auto * hasHandle = &fileHeap2;//已经处理的
int oldLongestNum;
int longestNum;
int writeNum;
int readNum;
int oldLongestOpenTag = 0;
while ((*notHandle).size()>1) {//直到合并为1个文件
//打开最长的文件
longestNum = (*notHandle).top().first;
(*notHandle).pop();
file[longestNum] = fopen(fileName(name, longestNum), "r");
while (!(*notHandle).empty()) {//合并剩下的文件
writeNum = nullFile.front();//弹出一个空文件
nullFile.pop();
file[writeNum] = fopen(fileName(name, writeNum), "w");
readNum = (*notHandle).top().first;//弹出一个文件
(*notHandle).pop();
if (readNum != oldLongestNum || (readNum == oldLongestNum && oldLongestOpenTag == 0))//上次的迭代最长的文件可能没读完,不需要重新打开
file[readNum] = fopen(fileName(name, readNum), "r");
while (runNum[readNum] > 0) {//把文件合并完
mergeRun(longestNum, readNum, writeNum);
runNum[longestNum]--;
runNum[readNum]--;
runNum[writeNum]++;
}
runLen[writeNum] = runLen[readNum] + runLen[longestNum];//合并后顺序串为原来的两个顺序串长度加起来
(*hasHandle).push({ writeNum,runNum[writeNum] });//合并的,放到已处理中,下一次的循环的时候再处理
fclose(file[writeNum]);
nullFile.push(readNum);//读完就是空文件了,放到空文件队列中
fclose(file[readNum]);
}
if (runNum[longestNum] == 0) {//看看最长的文件读完没
nullFile.push(longestNum);//读完了
fclose(file[longestNum]);
oldLongestOpenTag = 0;
}
else {
oldLongestOpenTag = 1;//没读完
(*hasHandle).push({ longestNum,runNum[longestNum] });//放到已经处理的文件堆中
}
oldLongestNum = longestNum;//记录这次的最长的文件
std::swap(notHandle, hasHandle);//交换指针,这次已处理完的文件成为下一次未处理的文件
}
}