首先要感谢算法的发明者, 要不是他们发明的这么精妙的算法, 我也不会到现在还在头疼!
本文只是表述算法的代码实现过程,以及具体的实现代码,算法的更多详情请参见维基百科
交换排序
冒泡排序
- 两两比较, 并交换位置, 直到末尾。
- 排除末尾数字, 重复1步骤, 直到只剩下一个数字。
当有一次循环未发生交换时, 说明排序已经完成
def bubble_sort(nums):
for i in range(len(nums)-1):
swaps = 0
for j in range(len(nums)-i-1):
if nums[j] > nums[j+1]:
nums[j], nums[j+1] = nums[j+1], nums[j]
swaps = 1
if not swaps:
break
return nums
梳排序
- 类似冒泡的一次循环,固定间距(总长除以1.3)的索引数值间两两比较, 并交换位置, 直到末尾。
- 缩小固定间距(除以1.3), 重复1步骤, 直到固定间距为1(即进行一次冒泡排序循环)为止。
- 不稳定的排序算法
除以1.3可以转换成乘以0.8
def comb_sort(nums, dec=0.8):
step = int(len(nums) * dec)
while step >= 1:
for i in range(0, step):
for j in range(i, len(nums)-step, step):
if nums[j] > nums[j+step]:
nums[j], nums[j+step] = nums[j+step], nums[j]
step = int(step*dec)
return nums
快速排序
- 取列表中间位置(或者随机位置, 据说随机效率最高, 未验证)的数值n为枢纽,将小于n的所有数放到n的左边, 大于等于n的所有数放到n的右边, n的索引位置就是他最终排序后的索引位置。具体的话,得先把n与末尾数值交换, 完成调整后, 再交换回来。
- 将枢纽两侧区间分别重复1步骤(递归调用), 直到所有数值都找到最终排序索引位置,并完成交换。
- 不稳定排序算法
def quick_sort(nums, start=None, end=None):
if start is None:
start = 0
end = len(nums)
if end <= start:
return
mid = (start + end) // 2
index = start
last = end-1
nums[mid], nums[last] = nums[last], nums[mid]
for i in range(start, last):
if nums[i] < nums[last]:
nums[i], nums[index] = nums[index], nums[i]
index += 1
nums[index], nums[last] = nums[last], nums[index]
quick_sort(nums, start, index)
quick_sort(nums, index+1, end)
return nums
选择排序
简单选择排序
- 一次遍历,找到最大值或最小值(或同时都找)的索引,与末尾做交换
- 排除末尾,重复1步骤,直到只剩下一个数字
- 不稳定的排序算法
def simple_select_sort(nums):
for i in range(len(nums)-1):
mini = i
for j in range(i+1, len(nums)):
if nums[j] < nums[mini]:
mini = j
nums[mini], nums[i] = nums[i], nums[mini]
return nums
堆排序
- 构建大顶锥,从二分之一长度索引的节点开始递减,对节点进行调整(使所有的顶点都大于左右节点数值)
- 将第一位置和最后一个(长度)位置上的数值进行交换,即确定一个数值
- 现在不满足大顶锥要求的就只有第一个节点,对第一个节点进行调整
- 重复2,3步骤,直到剩下一个数字
- 不稳定的排序算法
**注意:**要在列表开始位置补一个数字,才能和树的索引对齐,数据很庞大时或许可以舍弃第一个数字,毕竟列表在开始位置插入数字的时间复杂度是O(n)。
def adjust(nums, i, length):
while 2*i <= length:
t = 2*i # 左子树索引
maxi = i
if nums[t] > nums[i]:
maxi = t
if t+1 <= length and nums[t+1] > nums[maxi]:
maxi = t + 1
if maxi != i:
nums[i], nums[maxi] = nums[maxi], nums[i]
i = maxi
else:
break
def heapsort(nums):
length = len(nums) - 1
i = length // 2
while i >= 1:
adjust(nums, i, length)
i -= 1
while length >= 2:
nums[1], nums[length] = nums[length], nums[1]
length -= 1
adjust(nums, 1, length)
return nums
插入排序
简单插入排序
将第一个数字看做是一个有序列表,将后面的数字依次插入其中,其中会用到一个临时变量记录需要插入的数字
def insert_sort(nums):
for i in range(1, len(nums)):
tmp = nums[i]
pre = i - 1
while pre >= 0 and nums[pre] > tmp:
nums[pre+1] = nums[pre]
pre -= 1
nums[pre+1] = tmp
return nums
希尔排序
- 类似梳排序,将固定间距的索引数值间进行插入排序。
- 缩小间距,重复1步骤,直到间距等于1(即进行一次简单插入排序)为止。
- 无稳定的排序算法
def shell_sort2(nums):
n = len(nums)
n //= 2
while n >= 1:
for i in range(n, len(nums)):
temp = nums[i]
k = i - n
while k >= 0 and nums[k] > temp:
nums[k+n] = nums[k]
k -= n
nums[k+n] = temp
n //= 2
return nums
归并排序
将列表递归拆分,直到变成两个有序的列表(长度为一即有序),再将两个有序列表合并起来的过程
递归的很有深度
归并排序的空间复杂度会比较高,毕竟每一次的切片都是复制新的空间
def merge(lnums, rnums):
nums = []
li, ri = 0, 0
while li < len(lnums) and ri < len(rnums):
if lnums[li] < rnums[ri]:
nums.append(lnums[li])
li += 1
else:
nums.append(rnums[ri])
ri += 1
nums.extend(lnums[li:] + rnums[ri:])
return nums
def merge_sort(nums):
if len(nums) == 1:
return nums
mid = len(nums) // 2
lnums = merge_sort(nums[:mid])
rnums = merge_sort(nums[mid:])
return merge(lnums, rnums)
分配排序
计数排序
指定数值区间内对列表内数值进行计数,最后输出结果
貌似小规模,效率非常高,桶排序的简化(桶的大小为1)。
def count_sort(nums):
if not nums:
return nums
maxi = max(nums)
mini = min(nums)
counter = [0] * (max(nums) + 1)
for n in nums:
counter[n] += 1
i = 0
for j in range(mini, maxi+1):
t = counter[j]
while t > 0:
nums[i] = j
i += 1
t -= 1
return nums
桶排序
设定一个桶的大小(size),将数值按大小顺序均匀放入各个桶中,桶中数据使用其他排序算法,最后合并
貌似效率并不高,有待发掘用途
def bucket_sort(nums, bucket_size=6):
mini = min(nums)
maxi = max(nums)
bucket_count = (maxi - mini) // bucket_size + 1
buckets = [[] for _ in range(bucket_count)]
for n in nums:
buckets[(n-mini)//bucket_size].append(n)
nums.clear()
for i, b in enumerate(buckets):
simple_select_sort(b)
nums.extend(b)
return nums
基数排序
类似这样: 数字(digit)只有十个,即十个桶,先按个位排序,再按十位排序
想法很朴实有没有?
def radix_sort(nums):
bits = len(str(max(nums)))
buckets = [[] for _ in range(10)]
for i in range(bits):
for n in nums:
buckets[n//(10**i)%10].append(n)
nums.clear()
for bucket in buckets:
nums.extend(bucket)
buckets = [[] for _ in range(10)]
return nums
完整的测试效率比较代码
from random import randint
import datetime
sort_algs = {}
def reg(fn):
sort_algs[fn.__name__] = fn
return fn
# 冒泡法
@reg
def bubble_sort(nums):
for i in range(len(nums)-1):
swaps = 0
for j in range(len(nums)-i-1):
if nums[j] > nums[j+1]:
nums[j], nums[j+1] = nums[j+1], nums[j]
swaps = 1
if not swaps:
break
return nums
# 梳排序
@reg
def comb_sort(nums, dec=0.8):
step = int(len(nums) * dec)
while step >= 1:
for i in range(0, step):
for j in range(i, len(nums)-step, step):
if nums[j] > nums[j+step]:
nums[j], nums[j+step] = nums[j+step], nums[j]
step = int(step*dec)
return nums
# 简单选择排序
@reg
def simple_select_sort(nums):
for i in range(len(nums)-1):
mini = i
for j in range(i+1, len(nums)):
if nums[j] < nums[mini]:
mini = j
nums[mini], nums[i] = nums[i], nums[mini]
return nums
# 快速排序
@reg
def quick_sort(nums, start=None, end=None):
if start is None:
start = 0
end = len(nums)
if end <= start:
return
mid = (start + end) // 2
index = start
last = end-1
nums[mid], nums[last] = nums[last], nums[mid]
for i in range(start, last):
if nums[i] < nums[last]:
nums[i], nums[index] = nums[index], nums[i]
index += 1
nums[index], nums[last] = nums[last], nums[index]
quick_sort(nums, start, index)
quick_sort(nums, index+1, end)
return nums
# 插入排序
@reg
def insert_sort(nums):
for i in range(1, len(nums)):
tmp = nums[i]
pre = i - 1
while pre >= 0 and nums[pre] > tmp:
nums[pre+1] = nums[pre]
pre -= 1
nums[pre+1] = tmp
return nums
# 希尔排序
# @reg
# def shell_sort(nums):
# n = len(nums)
# n //= 2
# while n >= 1:
# for i in range(n):
# for j in range(i, len(nums), n):
# tmp = nums[j]
# pre = j - n
# while pre >= 0 and nums[pre] > tmp:
# nums[pre+n] = nums[pre]
# pre -= n
# nums[pre+n] = tmp
# n //= 2
# return nums
# 希尔排序效率更好, 原理相同, 只是写法上的差异是效率略有不同
@reg
def shell_sort2(nums):
n = len(nums)
n //= 2
while n >= 1:
for i in range(n, len(nums)):
temp = nums[i]
k = i - n
while k >= 0 and nums[k] > temp:
nums[k+n] = nums[k]
k -= n
nums[k+n] = temp
n //= 2
return nums
# 堆排序
def adjust(nums, i, length):
while 2*i <= length:
t = 2*i # 左子树索引
maxi = i
if nums[t] > nums[i]:
maxi = t
if t+1 <= length and nums[t+1] > nums[maxi]:
maxi = t + 1
if maxi != i:
nums[i], nums[maxi] = nums[maxi], nums[i]
i = maxi
else:
break
@reg
def heapsort(nums):
length = len(nums) - 1
i = length // 2
while i >= 1:
adjust(nums, i, length)
i -= 1
while length >= 2:
nums[1], nums[length] = nums[length], nums[1]
length -= 1
adjust(nums, 1, length)
return nums
# 归并排序
def merge(lnums, rnums):
nums = []
li, ri = 0, 0
while li < len(lnums) and ri < len(rnums):
if lnums[li] < rnums[ri]:
nums.append(lnums[li])
li += 1
else:
nums.append(rnums[ri])
ri += 1
nums.extend(lnums[li:] + rnums[ri:])
return nums
@reg
def merge_sort(nums):
if len(nums) == 1:
return nums
mid = len(nums) // 2
lnums = merge_sort(nums[:mid])
rnums = merge_sort(nums[mid:])
return merge(lnums, rnums)
# 计数排序
@reg
def count_sort(nums):
if not nums:
return nums
maxi = max(nums)
mini = min(nums)
counter = [0] * (max(nums) + 1)
for n in nums:
counter[n] += 1
i = 0
for j in range(mini, maxi+1):
t = counter[j]
while t > 0:
nums[i] = j
i += 1
t -= 1
return nums
# 桶排序
@reg
def bucket_sort(nums, bucket_size=6):
mini = min(nums)
maxi = max(nums)
bucket_count = (maxi - mini) // bucket_size + 1
buckets = [[] for _ in range(bucket_count)]
for n in nums:
buckets[(n-mini)//bucket_size].append(n)
nums.clear()
for i, b in enumerate(buckets):
simple_select_sort(b)
nums.extend(b)
return nums
@reg
def radix_sort(nums):
bits = len(str(max(nums)))
buckets = [[] for _ in range(10)]
for i in range(bits):
for n in nums:
buckets[n//(10**i)%10].append(n)
nums.clear()
for bucket in buckets:
nums.extend(bucket)
buckets = [[] for _ in range(10)]
return nums
def test(nums):
for sort_name, sort_func in sort_algs.items():
print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
newnums = nums[:]
start = datetime.datetime.now()
t = sort_func(newnums)
delta = (datetime.datetime.now()-start).total_seconds()
print(sort_name, delta)
print(t[100:110]) # 打印部分结果
if __name__ == '__main__':
nums = [randint(1, 1000000) for _ in range(1000)]
test(nums)