counting sort适用于排列分布比较集中的数据, 即最大值和最小值相差不会太大,具体这个差在多少合适,还是以后值得我思考的问题??
计数排序的算法思想是需要分配3个数组,original[], counter[original[]], counter[i] = counter[i]+counter[i-1], 即计数为小于等于本身值的个数,通过从累加小于i的项,
最后依照counter作为pos, 将original分布到result中, 再将result拷贝到original
result[counter[original[k]]-1] = original[k];
counter[original[k]]--;
算法分析:
1.时间复杂度为 O(n)。
2.空间复杂度为 O(n)。
3.计数排序不是原地排序算法(指不申请多余的空间来进行的排序);
是稳定排序算法(指在排序前后具有相同关键字之间的相对顺序保持不变);
1. C++实现的源代码如下:
#include <iostream>
using namespace std;
const int len = 102;
class CountSort //计数排序类
{
public:
CountSort();
~CountSort();
void sort();
friend ostream& operator<<(ostream& out,const CountSort& countsort);
private:
int *original;
int length;
};
CountSort::CountSort():length(len)
{
original = new int[length+2];
for (int i=0; i<length-2; i++)
{
original[i] = rand()%1000; //random generated numbers in 0-1000
}
original[length-2] = 566;
original[length-1] = 566;
}
CountSort::~CountSort()
{
delete[] original;
original = NULL;
}
ostream& operator<<(ostream& out,const CountSort& countsort)
{
for (int i=0; i<countsort.length; i++)
{
cout<<countsort.original[i]<<" ";
}
cout<<endl;
return out;
}
void CountSort::sort()
{
int max = 0;//max记录数组中最大的元素值
for (int i=0; i<length; i++)
{
if (original[i] > max)
{
max = original[i]; //找到最大值
}
}
int *counter = new int[max+1];//counter数组记录数组中每个元素出现的次数, 0-max
memset(counter,0,(max+1)*sizeof(int)); //assign space and init it to 0
int *result = new int[max+1]; //result数组存放排好序的元素
memset(result,0,(max+1)*sizeof(int));
//step1: map original[i] as the mark of counter[]
for(int i = 0; i <length; i++)
{
counter[original[i]]++;
}
//step 2: accumulate counter[]
for(int j = 1; j <= max; j++)
{
counter[j]+=counter[j-1];
}
//step 3: distribute counter to result[]
for(int k = 0; k < length; k++)
{
result[counter[original[k]]-1] = original[k];
counter[original[k]]--;
}
//step4:copy result[] to original[], has equal length
for(int m = 0; m < length; m++)
{
original[m] = result[m];
}
delete[] counter;
delete[] result;
}
int main()
{
CountSort *pcountsort = new CountSort();
cout<<"排序前:"<<endl;
cout<<*pcountsort;
pcountsort->sort();
cout<<"排序后:"<<endl;
cout<<*pcountsort;
system("pause");
return 0;
}
2 .由于空间申请了O(3n), 所以需要对空间进行优化,合并Step 2,3,4,
void CountSort::sort()
{
int max = 0;//max记录数组中最大的元素值
for (int i=0; i<length; i++)
{
if (original[i] > max)
{
max = original[i]; //找到最大值
}
}
int *counter = new int[max+1];//counter数组记录数组中每个元素出现的次数, 0-max
memset(counter,0,(max+1)*sizeof(int)); //assign space and init it to 0
//step1: map original[i] as the mark of counter[]
for(int i = 0; i <length; i++)
{
counter[original[i]]++;
}
//unite step 2,3,4
int z=0;
for (int i=0; i<=max; i++)
{
while(counter[i]-- > 0)
{
original[z++]=i;
}
}
delete[] counter;
}
3. 更进一步的优化算法
如果最大数和最小数基数很大,但是|max-min| 差别比较小,或者出现了负数的时候怎么处理。
可以通过哈希函数来代替第一步值与counter索引映射。original[i]= xi, counter[j]
对于直接映射即j=xi,所以h(xi) = xi;
那么如果是max和min都是正数,且min本身就很大,
j = xi-min=h(xi), 如果min是负数,这个问题留到以后再解决吧。。。
现在基于上面的代码进行验证。我们生成1000-2000的随机数, 如果令index = original[i], 那么现在的index = original[i] -min,
代码实现如下
void CountSort::sort()
{
int max = 0, min = 2000;//max记录数组中最大的元素值
for (int i=0; i<length; i++)
{
if (original[i] > max)
{
max = original[i]; //找到最大值
}
if(original[i]< min)
{
min = original[i];
}
}
int length2 = max - min +1;
int *counter = new int[length2];//counter数组记录数组中每个元素出现的次数, 0-max
memset(counter,0,(length2)*sizeof(int)); //assign space and init it to 0
int *result = new int[length]; //result数组存放排好序的元素
memset(result,0,(length)*sizeof(int));
//step1: map original[i] as the mark of counter[]
for(int i = 0; i <length; i++)
{
int index = original[i] - min;
counter[index]++;//j = xi-min
}
//step 2: accumulate counter[]
for(int j = 1; j <= length2 -1; j++)
{
counter[j]+=counter[j-1];
}
//step 3: distribute counter to result[]
for(int k = 0; k < length; k++)
{
int index = original[k] - min;
result[counter[index]-1] = original[k];
counter[index]--;
}
//step4:copy result[] to original[], has equal length
for(int m = 0; m < length; m++)
{
original[m] = result[m];
}
delete[] counter;
delete[] result;
}