前面对比了各种比较排序算法在面对百万级数据时所表现的性能,结果是连差强人意都说不上。所以又比较了内排序中的一些
非比较排序算法,来比较下它们面对百万级数据时的性能
#coding=utf-8
import random,time
'''Author:Zhanglongqi
Date:2018-3-24
email:Zlonqi@qq.com
Environment:Python 3,Win8.1
Descrip:比较桶排序、基数排序、计数排序处理百万级数据量时的时间性能
'''
def counting_sort(A,B,k):
C=[]
for i in range(k):
C.append(0)
for j in range(len(A)):
C[A[j]]=C[A[j]]+1
for i in range(1,k):
C[i]=C[i]+C[i-1]
for i in range(len(A)-1,-1,-1):
B[C[A[i]]]=A[i]
C[A[i]]=C[A[i]]-1
B.pop(0)
return B
def radix_sort(array):
bucket, digit = [[]], 0
while len(bucket[0]) != len(array):
bucket = [[], [], [], [], [], [], [], [], [], []]
for i in range(len(array)):
num = (array[i] // 10 ** digit) % 10
bucket[num].append(array[i])
array.clear()
for i in range(len(bucket)):
array += bucket[i]
digit += 1
return array
class bucketSort(object):
def _max(self,oldlist):
_max=oldlist[0]
for i in oldlist:
if i>_max:
_max=i
return _max
def _min(self,oldlist):
_min=oldlist[0]
for i in oldlist:
if i<_min:
_min=i
return _min
def sort(self,oldlist):
_max=self._max(oldlist)
_min=self._min(oldlist)
s=[0 for i in range(_min,_max+1)]
for i in oldlist:
s[i-_min]+=1
current=_min
n=0
for i in s:
while i>0:
oldlist[n]=current
i-=1
n+=1
current+=1
def __call__(self,oldlist):
self.sort(oldlist)
return oldlist
if __name__=='__main__':
nums=[]
Lis=[]
C=[]
D=[]
MAX=10000000
N=1000000
for _ in range(N):
n=random.randint(0,MAX)
nums.append(n)
C=nums.copy()
D=nums.copy()
for _ in range(N+1):
Lis.append(0)
start1=time.clock()
C=radix_sort(C)
end1=time.clock()
print('radix time is',end1-start1)
start2=time.clock()
Lis=counting_sort(nums,Lis,MAX)
end2=time.clock()
print('counting time is',end2-start2)
start=time.clock()
bucketSort()(D)
end=time.clock()
print('bucket time is',end-start)
if Lis==C and C==D:
print('True')
else:
print('False')
Demon:
可以发现桶排序最快,而且桶排序在单跑百万数据时只要不到2秒:
但是,实际使用时还得根据各种排序算法的时空开销和稳定性特点来选择。