快速排序和topK问题
快速排序
1、调用partition,找到一个基准index,使得数组左边的元素都小于基准值,数组右边的元素都大于基准值
2、对左侧和右侧的数组递归调用快排
#include <stdio.h>
const int MAX_LEN = 10;
int partition(int* list, int left, int right)
{
if (left >= right)
{
return 0;
}
int temp = list[left];
int i = left;
int j = right;
while (i < j)
{
while (i < j && list[j] >= temp)
{
j--;
}
if (i < j)
{
list[i] = list[j];
i++;
}
while (i < j && list[i] <= temp)
{
i++;
}
if (i < j)
{
list[j] = list[i];
j--;
}
}
list[i] = temp;
return i;
}
int quick_sort(int* list, int left, int right)
{
if (left >= right)
{
return 0;
}
int index = partition(list, left, right);
quick_sort(list, left, index - 1);
quick_sort(list, index + 1, right);
return 0;
}
int main(int argc, char* argv[])
{
int list[MAX_LEN];
for (int i = 0; i < MAX_LEN; i++)
{
list[i] = MAX_LEN - i;
}
quick_sort(list, 0, MAX_LEN);
for (int i = 0; i < MAX_LEN; i++)
{
printf("%d ", list[i]);
}
return 0;
}
基于链表的快排
链表特点:顺序访问
核心思想如下:
这里只提供关键代码:
typedef struct LNode
{
int val;
struct LNode* next;
}LNode;
int partition(LNode* list, LNode* start, LNode* end, LNode* left, LNode* right)
{
if (start == end)
{
return 0;
}
int temp = list->val;
left = list;
right = list->next;
while (right != end)
{
if (right->val >= temp)
{
right = right->next;
}
else
{
std::swap(left->val, right->val);
left = left->next;
right = right->next;
}
}
return 0;
}
int quick_sort(LNode* list, LNode* start, LNode* end)
{
if (start == end)
{
return 0;
}
LNode* left = nullptr;
LNode* right = nullptr;
partition(list, start, end, left, right);
quick_sort(list, start, left);
quick_sort(list, right, end);
return 0;
}
基于快排思想求解topK问题
- 时间复杂度为O(n) 比 基于heap的O(nlgk)更效率
- 需要将数据一次加载进内存,不能处理大规模数据
- 基于堆排序求解topk可以处理超大规模数据,不受内存限制
- partition 逻辑与快排完全一致
- 核心思想:当右侧元素个数(right_num)不足k个时,问题转化为在左侧求top(k-right_num),本质是个递归
int partition(int* list, int left, int right)
{
if (left >= right)
{
return 0;
}
int temp = list[left];
int i = left;
int j = right;
while (i < j)
{
while (i < j && list[j] >= temp)
{
j--;
}
if (i < j)
{
list[i] = list[j];
i++;
}
while (i < j && list[i] <= temp)
{
i++;
}
if (i < j)
{
list[j] = list[i];
j--;
}
}
list[i] = temp;
return i;
}
int quick_sort_topk(int* list, int left, int right, int k)
{
if (left >= right)
{
return 0;
}
int target_index = right - left - k;
int index = partition(list, left, right);
if (index == target_index)
{
return index;
}
else if (index < target_index)
{
// 右侧的数组元素个数大于k
return quick_sort_topk(list, index + 1, right, k);
}
else
{
// 右侧数组元素个数小于k(核心理解)
// 此时右侧数组元素个数为right-left-index(假设为right_num)
// ==需要在左侧找出top(k - (right_num))==
return quick_sort_topk(list, left, index - 1, k - (right-left-index));
}
return 0;
}