温习排序算法,用Python来实现很方便,无论是写代码还是生成随机数,都很简单。于是就把常用的一些算法给实现了一把,并做了一个简单的效率的比较。
SortBase是基类,完成一些基础的共用的操作,如保存数据,打印,swap数据等。定义了统一的开始排序的接口startSort(), 所有的具体排序算法类都要重载这个函数。我的本意是想利用基类统一管理排序算法,不过实现的不是太好,今后再完善吧!
把代码贴在这里,算是个备份吧。
#!/usr/bin/python
# Description : shell sort
import string, sys, random, time
class SortBase :
'''
base Class for each specific sort algorithm class
include data storage, printout ,etc
'''
def __init__(self, ori_data, name = 'base'):
self.unsorted_data = ori_data
self.sorted_data = ori_data[:]
self.length = len(ori_data)
self.sort_name = name
self.__generateSteps(self.length)
def __generateSteps(self,max):
'''
Used by shell sorting
'''
self.steps = [1]
while self.steps[0] < max :
newStep = 3*self.steps[0] + 1
self.steps.insert(0,newStep)
#print("Step list : ")
#print(self.steps)
def printOut(self):
print('The raw data :')
print(self.unsorted_data)
print('The sorted data :')
print(self.sorted_data)
x0 = self.sorted_data[0]
for x in self.sorted_data[1:]:
if x < x0 :
print("Error sort : ", x0, " behind ", x)
x0 = x
def swap(self, ls_data, index1, index2):
ls_data[index1], ls_data[index2] = \
ls_data[index2], ls_data[index1]
def restart(self):
''' revert data to unsorted status'''
self.sorted_data = self.unsorted_data[:]
def startSort(self):
''' virtual function for child to overload'''
print('base class have no sorting implementation')
def createSortByName(data ,name):
#print('In startSortByName(), name = ', name )
if name.lower().find('insert') >= 0 :
return InsertSort(data)
elif name.lower().find('bubble') >= 0 :
return BubbleSort(data)
elif name.lower().find('shell') >= 0 :
return ShellSort(data)
elif name.lower().find('quick') >= 0 :
return QuickSort(data)
elif name.lower().find('select') >= 0 :
return SelectSort(data)
elif name.lower().find('bucket') >= 0 :
return BucketSort(data, 0, 100, 50 )
createSortByName = staticmethod(createSortByName)
class InsertSort(SortBase) :
def __init__(self, ori_data):
SortBase.__init__(self, ori_data, 'insert')
def startSort(self):
print("Start insert sort ...")
data = self.sorted_data
for j in range(0, self.length):
for x in range(j, 0, -1):
if data[x] >= data[x-1] :
break;
self.swap(data , x, x-1)
class BubbleSort(SortBase) :
def __init__(self, ori_data):
SortBase.__init__(self, ori_data, 'bubble')
def startSort(self):
print("Start Bubble sort ...")
data = self.sorted_data
for i in range(0, self.length -1):
for i in range(0, self.length-i-1):
if data[i] > data[i+1] :
self.swap(data,i,i+1)
class ShellSort(SortBase) :
def __init__(self, ori_data):
SortBase.__init__(self, ori_data, 'shell')
def startSort(self):
print("Start shell sort ...")
l = len(self.unsorted_data)
for step in self.steps:
if step > l/2 :
continue
self.__shell_sort(step)
def __shell_sort(self, step):
data = self.sorted_data
i = 0
while i < step :
for j in range(i, self.length, step):
for x in range(j, i, -step):
if data[x] >= data[x-step] :
break;
self.swap(data , x, x-step)
i += 1
class QuickSort(SortBase) :
def __init__(self, ori_data):
SortBase.__init__(self, ori_data, 'quick')
def startSort(self):
print("Start Quick Sort ...")
data = self.sorted_data
self.__quickSortStep(0, self.length-1)
def __quickSortStep(self, begin, end):
if begin < end :
key_index = self.__quickSortDivid3v(begin,end)
self.__quickSortStep(begin,key_index -1 )
self.__quickSortStep(key_index+1, end)
def __quickSortDivid3v(self, begin, end) :
data = self.sorted_data
key_value = self.__quickSortMedian3(begin, end)
ori_b = begin
ori_e = end
if end - begin <= 2 :
return end - 1;
key_index = end - 1
# begin += 1
end -= 2
while begin < end :
while data[begin] < key_value :
begin += 1
while data[end] > key_value :
end -= 1
if begin < end :
self.swap(data, begin, end)
self.swap(data, begin, key_index)
return begin
def __quickSortMedian3(self, begin, end) :
data = self.sorted_data
if begin < end :
mid = int((end + begin)/2)
if data[mid] > data[end] : self.swap(data, mid, end)
if data[begin] > data[mid] : self.swap(data, begin, mid)
if data[mid] > data[end] : self.swap(data, end, mid)
self.swap(data, mid, end-1)
return data[end-1]
class SelectSort(SortBase) :
def __init__(self, ori_data):
SortBase.__init__(self, ori_data, 'select')
def startSort(self):
print("Start Select sort ...")
self.__sortOneStep(0)
#self.printOut()
def __sortOneStep(self,start):
data = self.sorted_data
l = self.length
mid = start
while mid < l :
max_value = data[mid]
max_index = mid
for x in range(mid,l,1) :
if data[x] < max_value :
max_value = data[x]
max_index = x
if max_index != mid :
self.swap(data,mid,max_index)
mid += 1
class BucketSort(SortBase):
def __init__(self, ori_data, min_value, max_value, interval = 10):
SortBase.__init__(self, ori_data)
self.min_value = min_value
self.man_value = max_value
self.interval = interval
self.buckets = {}
#return the key of the buckets dict
self.__decide_bucket = lambda d: (int((d - self.min_value) / self.interval) , d)
# self.make_interval()
def __make_interval(self) :
pass
def __insert_bucket(self, tuple_data):
'''
tmp: key of buckets dict
data : original input data need be handle
'''
# print('__insert_bucket()', 'tuple_data :',tuple_data )
b = self.buckets
tmp = tuple_data[0]
data = tuple_data[1]
if tmp not in b :
b[tmp] = []
b[tmp].append(data)
return
for i in list(range(len(b[tmp]))) :
if data <= b[tmp][i] :
b[tmp].insert(i,data)
def __reconnect(self) :
b = self.buckets
self.sorted_data = []
keys = list(b.keys())
keys.sort()
for key in keys :
self.sorted_data.extend(b[key])
def startSort(self) :
print("Start Bucket sort ...")
raw_datas = self.unsorted_data
for d in raw_datas:
self.__insert_bucket(self.__decide_bucket(d))
self.__reconnect()
# print('buckets is :', self.buckets)
self.printOut()
# comparision among each algorithms with different size datas
class SortComparision:
''' Class for each sorting algorithms' comprisition '''
data_range = 1000000
def __init__(self, sizeList, nameList):
self.sizes = sizeList[:]
self.names = nameList[:]
self.result = {}
time.clock()
def start(self):
for size in self.sizes :
tmp_data = random.sample(range(SortComparision.data_range), size)
#sort = SortCollection(,size))
for name in self.names:
sort = SortBase.createSortByName(tmp_data, name)
#sort.restart()selectRecursion
time1 = time.clock()
sort.startSort()
interval = time.clock() - time1
if name not in self.result:
self.result[name] = {}
self.result[name][size] = interval
def printOut(self):
#print("Comaprision Results: \n", self.result)
print("%10s" % ' ', end = '')
bFirstDraw=False
size_list = []
names_list = list(self.result.keys())
names_list.sort()
for name in names_list :
if not bFirstDraw :
size_list = list(self.result[name].keys())
size_list.sort()
for size in size_list :
print('%10d' % size, end=' ')
print(' ')
bFirstDraw = True
print('%10s' % name, end = ' ')
for size in size_list :
print('%10.5f' % self.result[name][size], end=' ')
print(' ')
if __name__ == "__main__" :
# nameList = ['insert', 'quick', 'shell','bubble']
# nameList = [ 'shell','quick', 'bucket']
nameList = [ 'bucket']
sizeList = [ 150]
# nameList = ['select', 'bubble', 'insert']
# sizeList = [ 5000]
com = SortComparision(sizeList, nameList)
print('Begine comparing ... \n\n')
com.start()
print('\n\nEnd comparing ... ')
com.printOut()
else :
print('Sort Algorithms Ready!')
SortBase是基类,完成一些基础的共用的操作,如保存数据,打印,swap数据等。定义了统一的开始排序的接口startSort(), 所有的具体排序算法类都要重载这个函数。我的本意是想利用基类统一管理排序算法,不过实现的不是太好,今后再完善吧!
现在实现的算法有Bubble, Insert,select, shell , quick 和 桶排序。
和理论分析一致,从简单的测试可以看出,shell和quick排序在数据量大的情况下还是比较快的,尤其是quick排序。不过insert和select在数据量小的情况下,因为实现简单,也是一种选择。冒泡就太原始了,效率比较低,算是提供一种思路吧,呵呵!桶排序在数据均匀分布情况下很强大,否则的话,效率退化严重。