因为《编程珠玑》的第一章是对文件排序,首先需要产生是一些不重复的随机数。关于产生不重复的随机数,可以用Hashtable的方式,即随机产生一个数就去检测,列表中这个数是否已经存在,然而这种非常耗时。有一篇帖子就是讲如何优化算法(http://www.cnblogs.com/eaglet/archive/2011/01/17/1937083.html)。首先讲了用Hashtable这种传统的方法,其次使用产生有序数列,随机产生一个数最为有序数列的下标得到相应数,有序数列删掉这一数,重新随机产生随机下标,如此循环。但是这种算法缺点在于将有序数列进行删除时会非常耗时。最后这篇博客采用了将有序数列的“队尾”数据复制到选到的随机下标位置上,取代删除带来的耗时。在我的试验中,也证实这种算法非常快。第一种算法耗时32min,而这一次算法只需要8s。
根据编程珠玑上的要求,我需要产生1000000-9999999之间所有的数据。
因此我的程序如下:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int seven_num(int );
int *vector_sequence(int num);
int *vector_random(int num,int bitnum);
void main()
{
time_t start ,end ;
double cost;
int i,j;
int num=9000000;
int bitnum=7;
FILE *txtname=fopen("data.txt","w");
unsigned int *rand_vector=(unsigned int *)malloc(num*sizeof(unsigned int));
unsigned int *sequence_vector=(unsigned int *)malloc(num*sizeof(unsigned int));
memset(rand_vector,0,num);
memset(sequence_vector,0,num);
// rand_vector=vector_random(num,bitnum);
time(&start);
sequence_vector=vector_sequence(num);
time(&end);
cost=difftime(end,start);
printf("%f/n",cost);
}
int *vector_sequence(int num)
{
int index_max,i,j,end;
unsigned int *sequence_vector = (unsigned int *)malloc(num*sizeof(unsigned int));
unsigned int *result_vector = (unsigned int *)malloc(num*sizeof(unsigned int));
FILE *txtname=fopen("sequence_vector.txt","w");
memset(sequence_vector,0,num);
memset(result_vector,0,num);
srand((unsigned)time(NULL));
index_max=9999999-1000000+1;
for (i=0;i<index_max;i++)
{
sequence_vector[i]=1000000+i;
// printf("[%d] = %d\n",i,sequence_vector[i]);
}
end=index_max-1;
for (j=0;j<index_max;j++)
{
int temp_index=rand()%(end+1);
int temp_value=sequence_vector[temp_index];
// printf("[%d] = %d\n",temp_index,temp_value);
sequence_vector[temp_index]=sequence_vector[end];
fprintf(txtname,"%d\n",temp_value);
result_vector[j]=temp_value;
end--;
}
return result_vector;
}
int *vector_random(int num,int bitnum)
{
int i;
int iter_num=num-1;
unsigned int *flag_vector=(unsigned int *)malloc(num*sizeof(unsigned int));
unsigned int *rand_vector=(unsigned int *)malloc(num*sizeof(unsigned int));
memset(flag_vector,0,num);
memset(rand_vector,0,num);
srand((unsigned)time(NULL));
for (i=0;i<iter_num;i++)
{
int temp;
temp = seven_num(bitnum);
while (flag_vector[temp]==1) //care for : the up bound
temp = seven_num(bitnum);
flag_vector[temp]=1;
rand_vector[i]=temp;
}
return *rand_vector;
}
int seven_num(int bitnum)
{
int number,count;
for (count=0;count<bitnum;count++)
{
if (count==0)
{
number=rand()%10;
while(number==0)
{
number=rand()%10;
}
}
else
{
number=number*10+(rand()%10);
}
}
return number;
}