归并排序介绍
归并排序(merge sort)算法采用了分而治之的策略,将大问题分解为小问题去求解,将小问题求解的结果进行合并形成大问题的解,核心在于归并算法。
数据集一分为二,二分为四,… ,一直分解到单个数据集只有一个元素,然后进行归并,最终合并出一个有序的数据集。
有两种实现方式,递归方式和迭代方式。
时间复杂度:O(nlogn) (最好,最差,平均)。
c++实现
递归方式
底层数据结构为数组,采用自顶向下的递归方式去实现,每次归并存在拷贝的动作,子数据集归并到临时数组,然后临时数组拷贝回原数组。
迭代实现
底层数据结构为数组,采用自底向上的迭代方式去实现,每次归并不存在拷贝的动作,a数组归并到b数组,b数组归并到a数组,a数组归并到b数组,循环往复,最终拿到一个有序的数组,中间不存在拷贝动作,效率更高。
ps:所有可以用递归实现的算法都可以用迭代去重写!
c++源码及测试代码
#include <iostream>
#include <ctime>
/
//归并排序--递归实现
/
template <typename T>
void merge(T *arr, int left, int mid, int right, T *temp)
{
int i = left;
int j = mid + 1;
int t = 0;
while (i <= mid && j <= right) {
if (arr[i] <= arr[j]) { //stable sort
temp[t++] = arr[i++];
} else {
temp[t++] = arr[j++];
}
}
while (i <= mid) {
temp[t++] = arr[i++];
}
while (j <= right) {
temp[t++] = arr[j++];
}
t = 0;
while (left <= right) {
arr[left++] = temp[t++];
}
}
template <typename T>
void sort(T *arr, int left, int right, T *temp)
{
if (left < right) {
int mid = (left + right) / 2;
sort<T>(arr, left, mid, temp);
sort<T>(arr, mid + 1, right, temp);
merge<T>(arr, left, mid, right, temp);
}
}
template <typename T>
void mergeSortByRecursive(T *arr, int n) //
{
T *temp = new T[n];
sort<T>(arr, 0, n - 1, temp);
delete [] temp;
}
/
//归并排序--迭代实现
/
template <typename T>
void merge(T *arr, T *temp, int start1, int end1, int end2) //start2 = end1 + 1
{
//arr 归并到 temp
int first1 = start1;
int first2 = end1 + 1;
int result = start1;
while (first1 <= end1 && first2 <= end2) {
if (arr[first1] <= arr[first2]) {
temp[result++] = arr[first1++];
} else {
temp[result++] = arr[first2++];
}
}
while (first1 <= end1) {
temp[result++] = arr[first1++];
}
while (first2 <= end2) {
temp[result++] = arr[first2++];
}
}
template <typename T>
void mergePass(T *arr, T *temp, int n, int segmentSize)
{
int i = 0;
while (i <= n - 2 * segmentSize) {
merge<int>(arr, temp, i, i + segmentSize - 1, i + 2 * segmentSize - 1);
i = i + 2 * segmentSize;
}
//少于两个满数据段
if (i + segmentSize < n) { //剩下两个数据段
merge<int>(arr, temp, i, i + segmentSize - 1, n - 1);
} else { //只剩一个数据段,直接复制
for (int j = i; j < n; ++j) {
temp[j] = arr[j];
}
}
}
template <typename T>
void mergeSortByIterator(T *arr, int n)///
{
T *temp = new T[n];
int segmentSize = 1;
while (segmentSize < n) {
mergePass<int>(arr, temp, n, segmentSize);
segmentSize = segmentSize * 2;
mergePass<int>(temp, arr, n, segmentSize);
segmentSize = segmentSize * 2;
}
delete [] temp;
}
class timeGuard
{
public:
timeGuard() : start(std::time(nullptr))
{
}
~timeGuard()
{
std::cout<< "time consumption: " << std::time(nullptr) - start << "(s)" << std::endl;
}
private:
int start;
};
int main (void)
{
const int size = 1000 * 1000 * 100; //1亿
//initialization
int *arr = new int[size];
for (int i = 0; i < size; ++i) {
arr[i] = size - i;
}
{
timeGuard tg;
mergeSortByRecursive<int>(arr, size);
std::cout << "merge sort by recusive: ";
}
// for (int i = 0; i < size; ++i) {
// std::cout << arr[i] << " ";
// }
//initialization
for (int i = 0; i < size; ++i) {
arr[i] = size - i;
}
{
timeGuard tg;
mergeSortByIterator<int>(arr, size);
std::cout << "merge sort by iterator: ";
}
// for (int i = 0; i < size; ++i) {
// std::cout << arr[i] << " ";
// }
return 0;
}
测试结果
测试平台:Darwin SamdeMacBook-Pro.local 18.7.0 Darwin Kernel Version 18.7.0: Tue Jan 12 22:04:47 PST 2021; root:xnu-4903.278.56~1/RELEASE_X86_64 x86_64
测试数据量:1亿个数据
测试结果:
merge sort by recusive: time consumption: 13(s)
merge sort by iterator: time consumption: 7(s)
可见,迭代方式性能更好。