文章目录
引用
海量数据处理 - 10亿个数中找出最大的10000个数(top K问题)
海量数据排序——如果有1TB的数据需要排序,但只有32GB的内存如何排序处理?
【动态规划】求数组不相邻元素之和最大
冒泡排序(Bubble Sort)
选择排序(Selection Sort)
插入排序(Insertion Sort)
void insert_sort(int arr[], int len)
{
for(int i = 1; i < len; i++)
for(int j = i - 1; j >= 0; j--)
{
if(arr[j] > arr[j + 1])
{
int temp = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = temp;
}
}
}
//改进算法
void insert_sort(int arr[], int len)
{
for(int i = 1; i < len; i++)
for(int j = i - 1; (j >= 0)&&(arr[j] > arr[j + 1]); j--)
{
int temp = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = temp;
}
}
//再改进
void insert_sort(int arr[], int len)
{
for(int i = 1; i < len; i++)
//int temp = arr[i];
for(int j = i - 1; (j >= 0)&&(arr[j] > arr[j + 1]); j--)
arr[j + 1] = arr[j];
arr[j + 1] = arr[i];
}
希尔排序(Shell Sort)
希尔排序算法是一种改进的插入排序算法。
void shell_sort(int arr[], int len)
{
int gap, i, j;
int temp;
for (gap = len / 2; gap > 0; gap /= 2) //无论len是多少,gap最后都等于1
for (i = gap; i < len; i++)
{
temp = arr[i];
for (j = i - gap; j >= 0 && arr[j] > temp; j -= gap)
arr[j + gap] = arr[j];
arr[j + gap] = temp;
}
}
//实现2
void shell_sort(int arr[], int len)
{
for(int gap = len / 2; gap > 0; gap /= 2)
for(int x = 0; x < gap; x++)
{
for (int i = gap + x; i < len; i += gap)
{
for (int j = i - gap; (j >= 0)&&(arr[j] > arr[j + gap]); j -= gap)
{
int temp = arr[j];
arr[j] = arr[j + gap];
arr[j + gap] = temp;
}
}
}
}
归并排序(Merge Sort)
void merge_sort(int arr[], int len)
{
int *temp = (int *)malloc(sizeof(int)*len);
sort(arr, 0, len - 1, temp);
}
void sort(int arr[], int left, int right, int *temp)
{
if(left < right)
{
int mid = (left + right)/2;
sort(arr, left, mid, temp);
sort(arr, mid + 1, right, temp);
merge(arr, left, mid, right, temp);
}
}
void merge(int arr[], int left, int mid, int right, int *temp)
{
int i = left;
int j = mid + 1;
int n = 0;
while ((i <= mid) && (j <= right))
{
if (arr[i] > arr[j])
temp[n++] = arr[j++];
else
temp[n++] = arr[i++];
}
while (i <= mid)
temp[n++] = arr[i++];
while (j <= right)
temp[n++] = arr[j++];
for (int i = 0; i < n; i++)
{
arr[i + left] = temp[i];
//std::cout <<i<<':'<< temp[i] << ' ';
}
//std::cout << std::endl;
}
快速排序(Quick Sort)
void quick_sort(int arr[], int left, int right)
{
if(left < right)
{
int X = arr[left];
int i = left;
int j = right;
while(i < j)
{
while(i < j&&arr[j] >= X)
j--;
if(i < j)
{
arr[i] = arr[j];
i++;
}
while(i < j&&arr[i] < X)
i++;
if(i < j)
{
arr[j] = arr[i];
j--;
}
}
arr[i] = X;
quick_sort(arr, left, i - 1);
quick_sort(arr, i + 1, right);
}
}
堆排序(Heap Sort)
最大堆的实现(Heap)
- 堆是一颗[完全二叉树]。(https://baike.baidu.com/item/%E5%AE%8C%E5%85%A8%E4%BA%8C%E5%8F%89%E6%A0%91)
- 每一个父节点的值都比其子节点要大或小。
- 如果用数组存储堆,且以层遍历的顺序存储,则节点arr[i]的左子节点位arr[2*i+1],右子节点为arr[2*i+2],即节点arr[i]的父节点为arr[(i-1)/2]。
堆的常用方法:
- 构建优先队列
- 支持堆排序
- 快速找出一个集合中的最小值(或者最大值)
#include<iostream>
#include<cstring>
#define INIT_SIZE 2
class Heap
{
public:
Heap() :ptr(new int[INIT_SIZE]), size(0),free(INIT_SIZE) {}
// Heap(int num);
~Heap();
void push_heap(int target);
int top_heap(int& target);
int get_size() const;
Heap(const Heap& heap) = delete;
Heap operator=(const Heap& heap) = delete;
private:
int size;
int free;
//int length;
public:
int* ptr;
};
Heap::~Heap()
{
delete[]ptr;
}
void Heap::push_heap(int target)
{
if (free == 0)
{
//std::cout << "before new memory, size = " << size << " free = " << free << std::endl;
int* temp = new int[2 * size];
if (memcpy(temp, ptr, size * sizeof(int)) == NULL)
std::cout << "error" << std::endl;
/*
for (int i = 0; i < size; i++)
std::cout << ptr[i] << ' ';
for (int i = 0; i < 2 * size; i++)
std::cout << temp[i] << ' ';
*/
delete[]ptr;
ptr = temp;
free = size;
//std::cout << "after new memory, size = " << size << " free = " << free << std::endl;
}
int i = size; //最后一个节点的父节点为(i - 1)/ 2
ptr[size] = target;
size++; //最后一个节点的父节点为 size / 2 - 1
free--;
if (i > 0)
{
for (int parent = (i - 1) / 2; parent >= 0; parent = (i - 1) / 2)
{
if (target > ptr[parent])
{
ptr[i] = ptr[parent];
ptr[parent] = target;
if ((i = parent) == 0)
break;
}
else
break;
}
}
}
int Heap::top_heap(int &target)
{
if (size == 0)
return -1;
target = ptr[0];
int i = 0;
while((i < size)&&(2 * i + 1) <size)
{
if ((ptr[2 * i + 1] > ptr[2 * i + 2]) || ((2 * i + 2) > size))
{
ptr[i] = ptr[2 * i + 1];
i = 2 * i + 1;
}
else
{
ptr[i] = ptr[2 * i + 2];
i = 2 * i + 2;
}
}
while (i < size)
{
ptr[i] = ptr[i + 1];
i++;
}
size--;
free++;
if ((free + size) > INIT_SIZE && free > 2 * size)
{
int* temp = new int[free / 2];
memcpy(temp, ptr, size * sizeof(int));
delete[] ptr;
ptr = temp;
free = free / 2 - size;
// std::cout << "free memory, size = " << size << std::endl;
}
return 0;
}
int Heap::get_size() const
{
return size;
}
int main()
{
using namespace std;
//cout << (0 - 1) / 2 << endl; //0
Heap heap;
/*
int* p1 = new int[2]{ 1, 0 };
int* p2 = new int[4];
memcpy(p2, p1, 2*sizeof(int));
for (int i = 0; i < 2; i++)
cout << p2[i] << ' ';
cout << endl;
*/
cout << "raw: ";
for (int i = 0; i < 6; i++)
{
cout << i << ' ';
heap.push_heap(i);
}
cout << endl;
cout << "heap: ";
int size = heap.get_size();
for (int i = 0; i < size; i++)
cout << (heap.ptr)[i] << ' ';
cout << endl;
int x;
cout << "top_heap: ";
for (int i = 0; i < size; i++)
{
heap.top_heap(x);
cout << x << ' ';
}
cout << endl;
//cout << "size = " << heap.get_size() << endl;
return 0;
}
堆排序算法
算法步骤
-
创建一个最大堆 H[0……n-1];
-
把堆首(最大值)和堆尾互换;
-
把堆的尺寸缩小 1,并调用 shift_down(0),目的是把新的数组顶端数据调整到相应位置;
-
重复步骤 2,直到堆的尺寸为 1。
基数排序(Counting Sort)
算法的步骤如下:
- 找出待排序的数组中最大和最小的元素
- 统计数组中每个值为i的元素出现的次数,存入数组C的第i项
- 对所有的计数累加(从C中的第一个元素开始,每一项和前一项相加)
- 反向填充目标数组:将每个元素i放在新数组的第C(i)项,每放一个元素就将C(i)减去1