文章目录
排序算法
时间复杂度为O(n2)的排序算法
冒泡排序
相邻的元素进行比较,当一个元素大于其右侧相邻元素时,交换它们的位置;当一个元素小于或等于右侧相邻元素时,位置不变
#include <stdio.h>
int list[] = {5,1,2,3,7,6,8,9,10};
int length = sizeof(list)/sizeof(int);
int* sort(int* list){
int i,j;
int max;
for(j = 0;j<length-1;j++){
for(i = 0;i<length-1;i++){
if(list[i]>list[i+1]){
max = list[i];
list[i] = list[i+1];
list[i+1] = max;
}
}
}
return list;
}
void outout(int* list){
for(int i=0;i<length;i++){
printf("%d ", *list);
list += 1;
}
printf("\n");
}
int main(int argc, const char * argv[]) {
outout(list);
sort(list);
outout(list);
return 0;
}
冒泡排序是一种稳定排序,值相等的元素并不会打乱原本的顺序,该排序算法每一轮都要遍历所有元素,总共遍历(元素数量-1)轮,所以平均时间复杂度是O(n2)
当大部分元素都是有序的时候,可以采用鸡尾酒排序,奇数次从左到右,偶数次从右到左
选择排序
#include <iostream>
#include <vector>
using namespace std;
vector<int> list {2, 5, 4, 1, 5, 8, 7, 6, 9, 3};
void chooseSort(vector<int> &list) {
int *min = &list[0];
for (int i = 0; i < list.size() - 1; ++i) {
for (int j = i; j < list.size(); ++j) {
if (list[j] < *min) {
min = &list[j];
}
}
swap(list[i], *min);
min = &list[i+1];
}
}
void output(vector<int> &list) {
for (auto &it : list) {
cout << it << ' ';
}
cout << endl;
}
int main(int argc, char *argv[]) {
chooseSort(list);
output(list);
return 0;
}
插入排序
#include <iostream>
#include <vector>
using namespace std;
vector<int> list {2, 5, 4, 1, 5, 8, 7, 6, 9, 3};
void insertSort(vector<int> &list) {
for (int i = 0; i < list.size(); ++i) {
for (int j = i; j > 0; --j) {
if (list[j] < list[j-1]) {
swap(list[j], list[j-1]);
}
else {
break;
}
}
}
}
void output(vector<int> &list) {
for (auto &it : list) {
cout << it << ' ';
}
cout << endl;
}
int main(int argc, char *argv[]) {
insertSort(list);
output(list);
return 0;
}
希尔排序
(性能优于O(n2),但不如O(nlogn))
#include <iostream>
#include <vector>
using namespace std;
vector<int> list {2, 5, 4, 1, 5, 8, 7, 6, 9, 3};
void shellSort(vector<int> &list) {
int len = list.size();
for (int i = len/2; i > 0; i /= 2) {
for (int j = 0; j + i < len ; ++j) {
if (list[j] > list[j+i]) {
swap(list[j], list[j+i]);
}
}
}
}
void output(vector<int> &list) {
for (auto &it : list) {
cout << it << ' ';
}
cout << endl;
}
int main(int argc, char *argv[]) {
shellSort(list);
output(list);
return 0;
}
时间复杂度为O(nlogn)的排序算法
快速排序
快速排序也属于交换排序,快速排序在每一轮挑选一个基准元素,并让其他比它大的元素移动到数列一边,比它小的元素移动到数列另一边,从而把数列拆解成两个部分,这种思路叫做分治法
快速排序平均时间复杂度为O(nlogn),最坏情况下时间复杂度为O(n2)
基准元素的选择
随机选择一个元素作为基准元素,并且让基准元素和数列首元素交换位置
元素的交换
- 双边循环法
选定基准元素,设置两个指针left和right,指向数列最左和最右两个元素,从right指针开始,让指针所指向元素和基准元素做比较,如果大于等于pivot,则指针左移动;若果小于pivot,则rignt指针停止移动,切换到left指针。left指针指向元素和基准元素进行比较,如果小于等于pivot,则指针右移;若果大于pivot,则left停止移动,左右指针停下后,进行元素交换,左右指针重合的位置和基准元素交换
C++实现
// 3. 快速排序
void quick_sort(vector<int>&nums){
quick_sort(nums, 0, nums.size() - 1);
}
void quick_sort(vector<int>& nums, int start, int end){
int left = start, right = end;
if(left < right){
int pivot = nums[left];
while(left < right){
// 快速排序为什么从右边开始, 需要理解一下昂
while(left<right && nums[right]>pivot){ --right; }
while(left<right && nums[left]<=pivot){ ++left; }
if(left<right) swap(nums[left], nums[right]);
}
swap(nums[left], nums[start]);
quick_sort(nums, start, left - 1);
quick_sort(nums, left + 1, end);
}
}
#include <stdio.h>
int list[] = {5,1,2,3,7,6,8,9,10,0};
int length = sizeof(list)/sizeof(int);
int partition(int* list, int startIndex, int endIndex){
int pivot = list[startIndex];
int left = startIndex;
int right = endIndex;
while(left!=right){
while(left<right&&pivot<list[right]){
right -= 1;
}
while(left<right&&pivot>=list[left]){
left += 1;
}
if(left<right){
int p = list[left];
list[left] = list[right];
list[right] = p;
}
}
list[startIndex] = list[left];
list[left] = pivot;
return left;
}
void quickSort(int* list, int startIndex, int endIndex){
if(startIndex >= endIndex){
return;
}
int pivotIndex = partition(list, startIndex, endIndex);
quickSort(list, startIndex, pivotIndex-1);
quickSort(list, pivotIndex+1, endIndex);
}
void output(int* list){
int i;
for(i=0;i<length;i++){
printf("%d ", list[i]);
}
printf("\n");
}
int main(int argc, const char * argv[]) {
quickSort(list, 0, length-1);
output(list);
return 0;
}
- 单边循环法
与双边循环类似,首先选定基准元素pivot,同时设置一个mark指针指向数列起始位置,mark指针代表小于基准元素的区域边界,此后开始从基准元素下一个位置遍历数组,如果遍历到的元素大于基准元素,就继续往后遍历。若小于基准元素:1. 把mark指针右移一位,因为小于pivot的区域边界增大了1. 2. 让最新遍历到的元素和mark指针所在位置的元素交换位置,因为最新遍历的元素属于小于pivot的区域,最后把pivot元素交换到mark指针所在位置,这一轮结束
#include <stdio.h>
int list[] = {5,1,2,3,7,6,8,9,10,0};
int length = sizeof(list)/sizeof(int);
int partition(int* list, int startIndex, int endIndex){
int pivot = list[startIndex];
int mark = startIndex;
for(int i=startIndex+1;i<=endIndex;i++){
if(list[i]<pivot){
mark += 1;
int p = list[mark];
list[mark] = list[i];
list[i] = p;
}
}
list[startIndex] = list[mark];
list[mark] = pivot;
return mark;
}
void quickSort(int* list, int startIndex, int endIndex){
if(startIndex >= endIndex){
return;
}
int pivotIndex = partition(list, startIndex, endIndex);
quickSort(list, startIndex, pivotIndex-1);
quickSort(list, pivotIndex+1, endIndex);
}
void output(int* list){
int i;
for(i=0;i<length;i++){
printf("%d ", list[i]);
}
printf("\n");
}
int main(int argc, const char * argv[]) {
quickSort(list, 0, length-1);
output(list);
return 0;
}
由于栈的回溯特性,同样可以用栈来代替递归,栈中存储 每一次交换时的启示下标和结束下标,每一次循环让栈顶元素出栈,通过partition方法进行分治,并且按照基准元素的位置分成左右两部分,左右两部分再分别入栈,栈为空时表示排序完毕
递归双指针实现
#include <stdio.h>
void quickSort(int* list, int front, int rear){
int temp;
int i = front, j = rear;
if (front < rear) {
temp = list[front];
while (i < j) {
while (j > i && list[j] >= temp) {
--j;
}
if(i < j){
list[i] = list[j];
++i;
}
while (i < j && list[i] < temp) {
++i;
}
if (i < j) {
list[j] = list[i];
--j;
}
}
list[i] = temp;
quickSort(list, front, i - 1);
quickSort(list, i + 1, rear);
}
}
void output(int* list, int length){
for (int i = 0; i < length; ++i) {
printf("%d ", list[i]);
}
putchar('\n');
}
int main(int argc, char *argv[]) {
int list[] = {9, 8, 7, 6, 5, 4, 3, 2, 1};
int length = sizeof(list)/sizeof(int);
output(list, length);
quickSort(list, 0, length);
output(list, length);
return 0;
}
归并排序
归并排序是一种采用了分治思想的排序,其原理是将待排序的数组多次分割合并,最终使得时间复杂度为nlog(n)
#include <iostream>
#include <cstdlib>
using namespace std;
void mergeA(int *ori, int s, int e, int *res) {
int ptr1 = s;
int mid = s + (e - s) / 2;
int ptr2 = mid + 1;
int i = 0;
while (ptr1 <= mid && ptr2 <= e) {
if (ori[ptr1] < ori[ptr2]) {
res[i++] = ori[ptr1++];
}
else {
res[i++] = ori[ptr2++];
}
}
while (ptr1 <= mid) {
res[i++] = ori[ptr1++];
}
while (ptr2 <= e) {
res[i++] = ori[ptr2++];
}
for (int i = 0; i < e - s + 1; ++i) {
ori[s + i] = res[i];
}
}
void mergeSort(int *ori, int s, int e, int *res) {
// 由于是左闭右闭区间,当s==e时,说明只有一个元素,即s对应的元素,也是e对应的元素[s,e] == s
if (s < e) {
int m = s + (e - s) / 2;
mergeSort(ori, s, m, res);
mergeSort(ori, m + 1, e, res);
mergeA(ori, s, e, res);
}
}
int main(int argc, char *argv[]) {
int ori[] = {5, 2, 6, 8, 11, 9, 7, 3, 1, 10, 4};
int *res = (int*)calloc(sizeof(ori)/sizeof(int), sizeof(int));
mergeSort(ori, 0, sizeof(ori)/sizeof(int) - 1, res);
for (int i = 0; i < sizeof(ori)/sizeof(int); ++ i) {
cout << res[i] << ' ';
}
cout << endl;
return 0;
}
堆排序
二叉树的构建、删除、自我调整等基本操作正是实现堆排序的基础。在最大堆中,每一次删除旧堆顶,调整后的新堆顶都是大小仅次于旧堆顶的节点,那么只要反复删除堆顶,反复调整二叉堆,所得到的集合就会称为一个有序集合,由于二叉堆实际存储在数组中,数组中即排好了顺序
堆排序顺序:
- 把无序数组构建成二叉堆。(最大或最小堆)
- 循环删除堆顶元素,替换到二叉堆的末尾,调整堆产生新的堆顶
开辟新空间,从小到大排:
#include <stdio.h>
#include <stdlib.h>
int arr[] = {3,5,10,2,7};
int length = sizeof(arr)/sizeof(int);
//Adjust the first element after you delete
void downAdjust(int* list, int length, int parentIndex){
int temp = *(list + parentIndex);
int childIndex = 2 * parentIndex + 1;
while(childIndex<length){
if(childIndex+1<length && *(list+childIndex+1)<*(list+childIndex)){
childIndex += 1;
}
if(temp<=*(list+childIndex)){
break;
}
*(list+parentIndex) = *(list+childIndex);
parentIndex = childIndex;
childIndex = 2 * parentIndex + 1;
}
*(list+parentIndex) = temp;
}
//Adjust from the last non-leaf node
void buildHeap(int* list, int len){
int i;
for(i = (len-2)/2;i>=0;i--){
downAdjust(list, len, i);
}
}
//Get the top node of heap
int getTop(int* arr){
static int i = 1;
int temp = *arr;
*arr = *(arr+length-i);
i += 1;
downAdjust(arr, length-i+1, 0);
return temp;
}
//Sort heap by the elements it stores
int* sortHeap(int* arr){
int* list = (int*)(malloc(sizeof(int)*length));
int i;
for (i=0; i<length; i++) {
*(list+i) = getTop(arr);
}
return list;
}
//Output the list elements
void output(int* arr){
int i;
for (i=0; i<length; i++) {
printf("%d ", *(arr+i));
}
printf("\n");
}
//Delete the zone
void delete(int* list){
free(list);
printf("List is deleted\n");
}
int main(int argc, const char * argv[]) {
output(arr);
buildHeap(arr, length);
output(arr);
int* list = sortHeap(arr);
output(list);
delete(list);
return 0;
}
不开辟新空间,从大到小排,且空间复杂度为O(1):
#include <stdio.h>
#include <stdlib.h>
int arr[] = {3,5,10,2,7};
int length = sizeof(arr)/sizeof(int);
//Adjust the first element after you delete
void downAdjust(int* list, int length, int parentIndex){
int temp = *(list + parentIndex);
int childIndex = 2 * parentIndex + 1;
while(childIndex<length){
if(childIndex+1<length && *(list+childIndex+1)<*(list+childIndex)){
childIndex += 1;
}
if(temp<=*(list+childIndex)){
break;
}
*(list+parentIndex) = *(list+childIndex);
parentIndex = childIndex;
childIndex = 2 * parentIndex + 1;
}
*(list+parentIndex) = temp;
}
//Adjust from the last non-leaf node
void buildHeap(int* list, int len){
int i;
for(i = (len-2)/2;i>=0;i--){
downAdjust(list, len, i);
}
}
//Sort heap by the elements it stores
void sortHeap(int* arr){
int temp;
for(int i=length-1;i>0;i--){
temp = *arr;
*arr = *(arr+i);
*(arr+i) = temp;
downAdjust(arr, i, 0);
}
}
//Output the list elements
void output(int* arr){
int i;
for (i=0; i<length; i++) {
printf("%d ", *(arr+i));
}
printf("\n");
}
int main(int argc, const char * argv[]) {
output(arr);
buildHeap(arr, length);
output(arr);
sortHeap(arr);
output(arr);
return 0;
}
时间复杂度:
downAjust()方法本身的时间复杂度为O(logn),总体来讲,无序数组构建成二叉堆的时间复杂度为O(n),此后需要进行n-1次循环,每一次循环调用一次downAdjust(),计算规模是(n-1)*logn,时间复杂度为O(nlogn),两个步骤是并列关系,整体时间复杂度是O(nlogn)
- 小总结:堆排序和快速排序平均时间复杂度都是O(nlogn),都是不稳定排序,快速排序最坏时间复杂度是O(n2),堆排序的最坏时间复杂度稳定在O(nlogn),快速排序的递归和非递归方法的平均空间复杂度都是O(logn),而堆排序的空间复杂度是O(1)
时间复杂度为线性的排序算法
计数排序
利用数组下标来确定元素的正确位置,需知道元素的取值范围
最初版本
通过获取原数组的最大值,来确定新数组的长度
#include <stdio.h>
#include <stdlib.h>
int list[] = {1,5,2,3,6,3,5,2,4,6,1,2,7,8,6,8};
int length = sizeof(list)/sizeof(int);
int findMax(int* list){
int i;
int max = list[0];
for (i=0; i<length; i++) {
if(max<list[i]){
max = list[i];
}
}
return max;
}
void initArray(int* arr, int length){
for(int i=0;i<length;i++){
*(arr+i) = 0;
}
}
int* countArray(int max){
int* arr = (int*)(malloc(sizeof(int)*(max+1)));
initArray(arr, max+1);
return arr;
}
void sort(int* list, int* arr){
int i;
for (i=0; i<length; i++) {
*(arr+*(list+i)) += 1;
}
}
void output(int* arr, int length){
for(int i=0;i<length;i++){
printf("%d ", *(arr+i));
}
printf("\n");
}
void putSort(int* arr, int length){
for(int i=0;i<length;i++){
while(*(arr+i)!=0){
printf("%d ", i);
*(arr+i) -= 1;
}
}
}
int main(int argc, const char * argv[]) {
int* arr = countArray(findMax(list));
output(list, length);
sort(list, arr);
output(arr, findMax(list)+1);
putSort(arr, findMax(list)+1);
return 0;
}
改良版
用原数组中的最大值减去最小值,来确定新数组的长度
#include <stdio.h>
#include <stdlib.h>
int list[] = {99,85,100,96,97,88,89,91};
int length = sizeof(list)/sizeof(int);
int findMax(int* list){
int i;
int max = list[0];
for (i=0; i<length; i++) {
if(max<list[i]){
max = list[i];
}
}
return max;
}
int findMin(int* list){
int i;
int min = list[0];
for (i=0; i<length; i++) {
if(min>list[i]){
min = list[i];
}
}
return min;
}
void initArray(int* arr, int length){
for(int i=0;i<length;i++){
*(arr+i) = 0;
}
}
int* countArray(int length){
int* arr = (int*)(malloc(sizeof(int)*(length)));
initArray(arr, length);
return arr;
}
void sort(int* list, int* arr){
int i;
for (i=0; i<length; i++) {
*(arr+*(list+i)-findMin(list)) += 1;
}
}
void output(int* arr, int length){
for(int i=0;i<length;i++){
printf("%d ", *(arr+i));
}
printf("\n");
}
void putSort(int* arr, int length, int min){
for(int i=0;i<length;i++){
while(*(arr+i)!=0){
printf("%d ", i+min);
*(arr+i) -= 1;
}
}
printf("\n");
}
int main(int argc, const char * argv[]) {
int* arr = countArray(findMax(list)-findMin(list)+1);
output(list, length);
sort(list, arr);
output(arr, findMax(list)-findMin(list)+1);
putSort(arr, findMax(list)-findMin(list)+1, findMin(list));
return 0;
}
再次改良
将存储元素个数的数组逐个加上前面所有元素个数之和,倒序遍历原数组并与新数组位置对应,实现稳定排序
#include <stdio.h>
#include <stdlib.h>
int list[] = {99,100,105,104,103,99,100,102,101};
int length = sizeof(list)/sizeof(int);
int findMax(int* list){
int i;
int max = list[0];
for (i=0; i<length; i++) {
if(max<list[i]){
max = list[i];
}
}
return max;
}
int findMin(int* list){
int i;
int min = list[0];
for (i=0; i<length; i++) {
if(min>list[i]){
min = list[i];
}
}
return min;
}
void initArray(int* arr, int length){
for(int i=0;i<length;i++){
*(arr+i) = 0;
}
}
int* countArray(int length){
int* arr = (int*)(malloc(sizeof(int)*(length)));
initArray(arr, length);
return arr;
}
void sort(int* list, int* arr){
int i;
for (i=0; i<length; i++) {
*(arr+*(list+i)-findMin(list)) += 1;
}
for (i=1; i<length; i++) {
*(arr+i) += *(arr+i-1);
}
}
int* compare(int* list, int* arr){
int* newList = (int*)(malloc(sizeof(int)*length));
int temp;
for(int i=length-1;i>=0;i--){
temp = *(list+i);
*(arr+(temp-findMin(list))) -= 1;
*(newList+*(arr+(temp-findMin(list)))) = temp;
}
return newList;
}
void output(int* arr, int length){
for(int i=0;i<length;i++){
printf("%d ", *(arr+i));
}
printf("\n");
}
int main(int argc, const char * argv[]) {
int* arr = countArray(findMax(list)-findMin(list)+1);
output(list, length);
sort(list, arr);
output(arr, findMax(list)-findMin(list)+1);
int* newList = compare(list, arr);
output(newList, length);
free(arr);
free(newList);
return 0;
}
不适合用技术排序的情况
- 当数列最大值和最小值差距过大时
- 当数列元素不是整数时
桶排序
类似于技术排序,但桶排序需要若干个桶(bucket)协助排序,每个桶代表一个区间范围,里面可以承载一个或多个元素
具体需要多少个桶,有多种方式,示例创建桶数量等于原始数列元素数量,除最后一个桶只包含数列最大值外,前面各个桶的区间按照比例来确定
区间跨度 = (最大值-最小值)/(桶的数量-1)
遍历原始数列,将元素分别对号入座到各个桶中,对每个桶内部元素分别进行排序,最后遍历所有的桶,输出所有元素
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 5
float list[] = {4.5,1.84,3.25,2.18,0.5};
int length = sizeof (list)/sizeof (float);
typedef struct bucket{
float size;
float* store;
} bucket;
void initBuck(bucket* myBuck){
for(int i=0;i<N;i++){
(myBuck+i)->size = 0;
(myBuck+i)->store = (float*)(malloc(sizeof(float)*length));
for(int j=0;j<length;j++){
*((myBuck+i)->store+j) = 10;
}
}
}
bucket* creatBuck(){
bucket* myBuck = (bucket*)(malloc(sizeof(bucket)*N));
initBuck(myBuck);
return myBuck;
}
float findMax(float* list){
float max = *list;
for(int i=0;i<length;i++){
if(*(list+i)>max){
max = *(list+i);
}
}
return max;
}
float findmin(float* list){
float min = *list;
for(int i=0;i<length;i++){
if(*(list+i)<min){
min = *(list+i);
}
}
return min;
}
float getGap(){
return findMax(list)-findmin(list);
}
void insertBuck(bucket* myBuck){
float gap = getGap()/(N-1);
float j;
for(int i=1;i<=N;i++){
j = findmin(list)+i*gap;
for(int k=0;k<length;k++){
if(*(list+k)<j&&*(list+k)>=j-gap){
(myBuck+(i-1))->size += 1;
*((myBuck+(i-1))->store+k) = *(list+k);
}
}
}
}
void sort(bucket* myBuck){
for(int i=0;i<N;i++){
float* p = (float*)(malloc(sizeof (float)*length));
memset(p,0,sizeof (float)*length);
int j = 0;
while ((myBuck+i)->size!=0) {
if(findmin((myBuck+i)->store)!=10){
*(p+j) = findmin((myBuck+i)->store);
for(int k=0;k<length;k++){
if(*(p+j) == *((myBuck+i)->store+k)){
*((myBuck+i)->store+k) = 10;
break;
}
}
j += 1;
(myBuck+i)->size -= 1;
}
}
free((myBuck+i)->store);
(myBuck+i)->store = p;
}
}
void output(bucket* myBuck){
for(int i=0;i<N;i++){
for(int j=0;j<length;j++){
if(*((myBuck+i)->store+j) != 0){
printf("%.2f ",*((myBuck+i)->store+j));
}
}
}
printf("\n");
}
int main()
{
bucket* myBuck = creatBuck();
insertBuck(myBuck);
sort(myBuck);
output(myBuck);
return 0;
}
桶排序的性能并非绝对稳定,如果元素分布极不均匀,则时间复杂度退化,且创建了许多空桶
基数排序
基数排序与计数排序类似,但避免了一些空间浪费,对于较长的数字或字符串,采用从最小位(低位优先排序LSD)或最大位(高位优先排序MSD)逐步比较的方法,多次按位比较后排序。
低位优先排序LSD
#include <iostream>
#include <vector>
#include <string>
using namespace std;
class RadixSort {
public:
RadixSort() : count(128), bucket(elems) {};
void getMaxLen();
vector<string> getSort();
private:
vector<string> elems {"ddab", "abc", "aba", "ber", "bmr", "cqr", "ffae", "ddac"};
// vector<string> elems {"bda", "cfd", "qwe", "yui", "abc", "rrr", "uue"};
vector<int> count;
vector<string> bucket;
int maxLen = 0;
};
void RadixSort::getMaxLen() {
for (auto it : elems) {
maxLen = it.size() > maxLen ? it.size() : maxLen;
}
}
vector<string> RadixSort::getSort() {
getMaxLen();
for (int k = maxLen - 1; k >= 0; --k) {
count.assign(128, 0);
for (int i = 0; i < elems.size(); ++i) {
if (elems[i].size() < k + 1) {
count[0] += 1;
}
else {
count[(int)elems[i][k]] += 1;
}
}
for (int i = 1; i < count.size(); ++i) {
count[i] += count[i-1];
}
// 由于在桶中插入元素是从最后的元素的位置起,逐步向前插入,所以采用倒序遍历,保持稳定排序
for (int i = elems.size() - 1; i >= 0; --i) {
if (elems[i].size() < k + 1) {
bucket[count[0] - 1] = elems[i];
count[0] -= 1;
}
else {
bucket[count[(int)elems[i][k]] - 1] = elems[i];
count[(int)elems[i][k]] -= 1;
}
}
elems = bucket;
}
return elems;
}
int main(int argc, char *argv[]) {
RadixSort* sortCore = new RadixSort();
auto res = sortCore -> getSort();
for (auto it : res) {
cout << it << ' ';
}
cout << endl;
}
以稳定性分类
- 稳定排序
如果值相同的元素排序后仍然保持原顺序,称为稳定排序
- 不稳定排序
如果值相同的元素排序后打乱了排序前的顺序,则为不稳定排序