1 . 归并排序和快速排序
1.1 归并排序
归并排序的思想就是讲数组分为两部分然后对两部分进行排序,然后讲排序后的两部分进行合并,主要的难度在于合并部分,合并的时候需要重新开一个临时数组保存合并的结果,然后再复制到原数组.
下面是归并排序的python
实现
# coding:utf-8
__author__ = 'devin'
'''
data: array
low,high: index of array
'''
def merge_sort(data, low, high):
if low < high:
mid = (low + high)/2
merge_sort(data, low, mid)
merge_sort(data, mid+1, high)
merge(data, low, mid, high)
def merge(data, low, mid, high):
temp = []
i = low
j = mid+1
while i <= mid and j <= high:
if data[i] <= data[j]:
temp.append(data[i])
i += 1
else:
temp.append(data[j])
j += 1
if i > mid:
while j <= high:
temp.append(data[j])
j += 1
else:
while i <= mid:
temp.append(data[i])
i += 1
i = low
j = 0
while i <= high:
data[i] = temp[j]
i += 1
j += 1
if __name__ == "__main__":
data = [1, 3, 2, 6, 3, 7, 2, 12, 15, 11, 10, 131, 1]
merge_sort(data, 0, len(data) - 1)
print data
1.1 基于链表的归并排序
基于链表的归并排序与一般使用的归并排序算法不同之处主要在于使用链表保存原数组元素的索引,开辟空间对索引排序,不改变原数组元素的顺序.该算法使用数组构造链表,排序后返回链表头索引值,也是原数组第一个元素的索引.这样相比普通的归并排序效率要好一些,省去了复制数组的麻烦,在大规模数据的情况下可以提高一定的性能.
下面是算法示例,使用python
实现.
# coding: utf-8
__author__ = 'devin'
import random
class MergeSortLink(object):
def __init__(self, data, link):
self.data = data
self.link = link
def insert_sort(self, low, high):
if low == high:
return low
head = low
i = low + 1
while i <= high:
temp = self.data[i]
p = head
pre = p
while p != -1:
if temp >= self.data[p]:
pre = p
p = self.link[p]
else:
break
if p == -1: # 插入尾部
self.link[pre] = i
self.link[i] = -1
elif p == head: # 插入头部
self.link[i] = p
head = i
else: # 插入中间
self.link[pre] = i
self.link[i] = p
i += 1
return head
def merge_sort_link(self, low, high):
if high-low + 1 < 16:
return self.insert_sort(low, high)
# if low == high:
# return low
else:
mid = (low+high)/2
q = self.merge_sort_link(low, mid)
r = self.merge_sort_link(mid+1, high)
return self.merge(q, r)
def merge(self, q, r):
i = q
j = r
p = None
k = 0
while True:
if self.data[i] <= self.data[j]:
if p is None:
p = i
else:
self.link[k] = i
k = i
i = self.link[i]
else:
if p is None:
p = j
else:
self.link[k] = j
k = j
j = self.link[j]
if i == -1 or j == -1:
break
if i == -1:
self.link[k] = j
else:
self.link[k] = i
return p
def print_link(self, p):
sorted_data = []
while p != -1:
sorted_data.append(self.data[p])
p = self.link[p]
print sorted_data
if __name__ == "__main__":
test_data = [random.randint(1, 100) for i in range(50)]
print test_data
link = [-1 for i in range(len(test_data))] # -1表示链表结束,为了与索引0区分,所以不能用0
sort_link = MergeSortLink(test_data, link)
p = sort_link.merge_sort_link(0, len(test_data)-1)
sort_link.print_link(p)
1.2 快速排序
快速排序的思想很简单,在数组中选择一个数,将数组划分为小于和大于该数两个部分,然后在这两个部分进行递归快速排序,因此算法的核心就是划分数的选择.
下面示例代码使用python
实现.
# coding: utf-8
__author__ = 'devin'
import random
def partition(data, low, high):
index = random.randint(low, high)
temp = data[index]
data[index] = data[low]
data[low] = temp
i = low
j = high
v = data[low]
while True:
while i <= high and data[i] <= v:
i += 1
while j >= low and data[j] > v:
j -= 1
if i < j:
temp = data[i]
data[i] = data[j]
data[j] = temp
else:
break
data[low] = data[j]
data[j] = v
return j
def quick_sort(data, low, high):
if low < high:
p = partition(data, low, high)
quick_sort(data, low, p-1)
quick_sort(data, p+1, high)
if __name__ == "__main__":
test_data = [random.randint(1, 100) for i in range(40)]
print test_data
quick_sort(test_data, 0, len(test_data)-1)
print test_data
2 算法测试
使用长读分别为100,200,300,400,500,600,700,800,900,1000的是个数组排列统计第一节中两个算法的时间复杂度
2.1 编写测试程序
为了方便测试,编写一个测试程序,程序的输入为数据规模因子,比如输入100,则测试的10个数组每个数组为i*100,即100,200,300,400,500,600,700,800,900,1000.然后用python
的图形库matplotlib
输出三个算法的时间-规模折线图.
测试代码如下:
# coding: utf-8
__author__ = 'devin'
import time
import random
from MergeSort import merge_sort
from MergeSortL import MergeSortLink
from QuickSort import quick_sort
import matplotlib.pyplot as plt
if __name__ == '__main__':
factor = raw_input()
data_scale = [i * int(factor) for i in range(1, 11)]
merge_sort_time = []
merge_sort_l_time = []
quick_sort_time = []
print data_scale
for i in range(10):
scale = data_scale[i]
test_data = [random.randint(1, scale*2) for i in range(scale)]
data_1 = test_data
start = time.time()
merge_sort(data_1, 0, len(data_1) - 1)
end = time.time()
merge_sort_time.append(end-start)
data_2 = test_data
link = [-1 for i in range(len(data_2))] # -1表示链表结束,为了与索引0区分,所以不能用0
start = time.time()
sort_link = MergeSortLink(test_data, link)
p = sort_link.merge_sort_link(0, len(data_2)-1)
end = time.time()
# sort_link.print_link(p)
merge_sort_l_time.append(end-start)
data_3 = test_data
start = time.time()
quick_sort(data_3, 0, len(data_3) - 1)
end = time.time()
# print data_3
quick_sort_time.append(end-start)
print "Scale: ", data_scale
print "Merge: ", merge_sort_time
print "MergeL: ", merge_sort_l_time
print "QuickSort: ", quick_sort_time
merge_sort_plot = plt.plot(data_scale, merge_sort_time, 'b', label='MergeSort')
merge_sort_l_plot = plt.plot(data_scale, merge_sort_l_time, 'r', label='MergeSortLink')
quick_sort_plot = plt.plot(data_scale, quick_sort_time, 'g', label='QuickSort')
quick_sort_plot = plt.plot(data_scale,
[quick_sort_time[0]*data_scale[i]/data_scale[0] for i in range(10)],
'y', label='O(n)')
max_time = max(merge_sort_time[9], merge_sort_l_time[9], quick_sort_time[9])
plt.xlabel("Scale")
plt.ylabel("time")
plt.ylim(0, max_time * 1.2)
plt.title('Algorithm Time & Data Scale')
plt.legend()
plt.show()
2.2 测试分析
由于再规模因为为100时,运行时间很短,因此测试因子选择10000,测试结果如下图所示
在折线图中,黄色实线代表时间复杂度为O(n), 蓝色是普通的归并排序,红色是基于链表的归并排序,绿色是快速排序,可以清楚的看到三种排序算法的时间复杂度明显大于O(n), 小于
O(n2)
, 而且再数据规模较小的情况下,三者的排序所用时间差不多,数据规模较大是,快速排序和基于链表的归并排序明显优于普通的归并排序,快速排序效果最好,因此再大规模数据下快速排序的速度最快,他们的时间复杂度均为
O(nlogn)