寻找无序数组中第k大的数

最新推荐文章于 2024-08-22 23:48:17 发布

蜗牛一步一步往上爬

最新推荐文章于 2024-08-22 23:48:17 发布

阅读量1.3w

点赞数 11

分类专栏： algorithm 文章标签： algorithm

本文链接：https://blog.csdn.net/yc461515457/article/details/51177812

版权

algorithm 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

对于一个无序的数组，怎样找到其中第k大的数呢？下面总结几种方法。

1.直接排序法

使用常见的归并排序、堆排序等算法对数组进行排序，然后找到第k大的数。排序算法的时间复杂度为O(nlogn)，所以算法总的时间复杂度为O(nlogn)。

// Simple C++ program to find k'th biggest element
#include<iostream>
#include<algorithm>
using namespace std;

// Function to return k'th biggest element in a given array
int kthBiggest(int arr[], int n, int k)
{
    // Sort the given array
    sort(arr, arr+n);

    // Return k'th element in the sorted array
    return arr[n+k-1];
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20};
    int n = (a, sizeof(a) / sizeof(int));
    cout << "K'th biggest element is " <<  kthBiggest(arr, n, 2);
    return 0;
}

2.堆选择法-最大堆

可以使用最大堆对数组建立堆，只是需要找到第k大的数所以没有必要像直接排序方法中那样对所有数进行排序。建立好最大队后每次都提取出最大的数，提取 k 次就得到了第 k 大的数。建立最大堆的时间复杂度为O(n)，提取最大数后每次调整最大队时间复杂为O(logn)，所以总的时间复杂度为O(n+k*logn)。

// A C++ program to find k'th smallest element using min heap
#include<iostream>
#include<climits>
using namespace std;

// Prototype of a utility function to swap two integers
void swap(int *x, int *y);

// A class for Max Heap
class MaxHeap
{
    int *harr; // pointer to array of elements in heap
    int capacity; // maximum possible size of max heap
    int heap_size; // Current number of elements in max heap
public:
    MaxHeap(int a[], int size); // Constructor
    void maxHeapify(int i);  //To maxHeapify subtree rooted with index i
    int parent(int i) { return (i - 1) / 2; }
    int left(int i) { return (2 * i + 1); }
    int right(int i) { return (2 * i + 2); }

    int extractMax();  // extracts root (maximum) element
    int getMax() { return harr[0]; } // Returns maximum

    // to replace root with new node x and heapify() new root
    void replaceMax(int x) { harr[0] = x;  maxHeapify(0); }
};

MaxHeap::MaxHeap(int a[], int size)
{
    heap_size = size;
    harr = a;  // store address of array
    int i = (heap_size - 1) / 2;
    while (i >= 0)
    {
        maxHeapify(i);
        i--;
    }
}

// Method to remove maximum element (or root) from max heap
int MaxHeap::extractMax()
{
    if (heap_size == 0)
        return INT_MAX;

    // Store the maximum vakue.
    int root = harr[0];

    // If there are more than 1 items, move the last item to root
    // and call heapify.
    if (heap_size > 1)
    {
        harr[0] = harr[heap_size - 1];
        maxHeapify(0);
    }
    heap_size--;

    return root;
}

// A recursive method to heapify a subtree with root at given index
// This method assumes that the subtrees are already heapified
void MaxHeap::maxHeapify(int i)
{
    int l = left(i);
    int r = right(i);
    int largest = i;
    if (l < heap_size && harr[l] > harr[i])
        largest = l;
    if (r < heap_size && harr[r] > harr[largest])
        largest = r;
    if (largest != i)
    {
        swap(&harr[i], &harr[largest]);
        maxHeapify(largest);
    }
}

// A utility function to swap two elements
void swap(int *x, int *y)
{
    int temp = *x;
    *x = *y;
    *y = temp;
}

// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
    // Build a heap of all n numbers
    MaxHeap mh(arr, n);

    int num = 0;
    // every time extract biggest number ,  k times
    for (int i = 0; i < k; i++)
        num = mh.extractMax();

    return num;
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
    int n = (a, sizeof(a) / sizeof(int));
    cout << "K'th biggest  element is " << kthBiggest(a, n, 3);
    return 0;
}

3.排序选择法-插入排序

由于是要找 k 个最大的数，所以没有必要对所有数进行完整的排序。每次只保留 k 个当前最大的数就可以，然后每次对新来的元素跟当前 k 个树中最小的数比较，新元素大的话则插入到数组中，否则跳过。循环结束后数组中最小的数即是我们要找到第 k 大的数。
时间复杂度 (n-k)logk

// A C++ program to find k'th smallest element using min heap
#include<iostream>

using namespace std;
void print_array(int a[], int low, int high)
{
    for (int i = low; i <= high; i++){
        cout << a[i] << " ";
    }
    cout << endl;
}

void insert(int a[], int low, int high, int num)
{
    int i = high-1;
    while (i >= low && a[i] < num){
        a[i + 1] = a[i];
        i--;
    }
    a[i + 1] = num;
}

void insertSort(int a[], int low, int high)
{
    for (int i = low + 1; i <= high; i++)
    {
        int temp = a[i];
        int j = i - 1;
        while (j >= low && a[j] < temp)
        {
            a[j + 1] = a[j];
            j--;
        }
        a[j + 1] = temp; //do not swap every time
    }
}

// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
    int *a = new int[k];
    for (int i = 0; i < k; i++){
        a[i] = arr[i];
    }
    insertSort(a, 0, k - 1);

    for (int i = k; i < n ; i++){
        if (arr[i]>a[k - 1]) insert(a, 0, k-1, arr[i]);
    }

    int num = a[k - 1];
    delete[] a;
    return num;
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
    int n = (a, sizeof(a) / sizeof(int));
    print_array(a, 0, n-1);
    cout << "K'th biggest  element is " << kthBiggest(a, n, 3);
    return 0;
}

4.排序选择法-最小堆

上一方法中的临时数组可以替换为最小堆，来找到数组中k个最大的数。先对前 k 个数进行建堆，堆顶为当前k个数组中最小的元素。时间复杂度O(k)。
然后将第 k+1…n 个数依次和堆顶元素比较，如果新元素大于堆顶元素，则将它作为新的堆顶元素并调整最小堆，否则再处理下一个数。这样每一次循环都将保证最小堆中的k个数为当前最大的 k 个数。时间复杂度O((n-k)*logk)。
循环结束后，堆顶元素即是我们要找的第 k 大的数。
算法总的时间复杂度是O(k+(n-k)*logk)。

// A C++ program to find k'th smallest element using min heap
#include<iostream>
#include<climits>
using namespace std;

// Prototype of a utility function to swap two integers
void swap(int *x, int *y);

// A class for min Heap
class MinHeap
{
    int *harr; // pointer to array of elements in heap
    int capacity; // maximum possible size of max heap
    int heap_size; // Current number of elements in max heap
public:
    MinHeap(int a[], int size); // Constructor
    void MinHeapify(int i);  //To minHeapify subtree rooted with index i
    int parent(int i) { return (i - 1) / 2; }
    int left(int i) { return (2 * i + 1); }
    int right(int i) { return (2 * i + 2); }

    int extractMin();  // extracts root (minimum) element
    int getMin() { return harr[0]; } // Returns minimum

    // to replace root with new node x and heapify() new root
    void replaceMax(int x) { harr[0] = x;  MinHeapify(0); }
};

MinHeap::MinHeap(int a[], int size)
{
    heap_size = size;
    harr = a;  // store address of array
    int i = (heap_size - 1) / 2;
    while (i >= 0)
    {
        MinHeapify(i);
        i--;
    }
}

// Method to remove minimum element (or root) from min heap
int MinHeap::extractMin()
{
    if (heap_size == 0)
        return INT_MAX;

    // Store the minimum vakue.
    int root = harr[0];

    // If there are more than 1 items, move the last item to root
    // and call heapify.
    if (heap_size > 1)
    {
        harr[0] = harr[heap_size - 1];
        MinHeapify(0);
    }
    heap_size--;

    return root;
}


// A recursive method to heapify a subtree with root at given index
// This method assumes that the subtrees are already heapified
void MinHeap::MinHeapify(int i)
{
    int l = left(i);
    int r = right(i);
    int smallest = i;
    if (l < heap_size && harr[l] < harr[i])
        smallest = l;
    if (r < heap_size && harr[r] < harr[smallest])
        smallest = r;
    if (smallest != i)
    {
        swap(&harr[i], &harr[smallest]);
        MinHeapify(smallest);
    }
}

// A utility function to swap two elements
void swap(int *x, int *y)
{
    int temp = *x;
    *x = *y;
    *y = temp;
}

// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
    // Build a heap of first k elements: O(k) time
    MinHeap mh(arr, k);

    // Process remaining n-k elements.  If current element is
    // bigger than root, replace root with current element
    for (int i = k; i < n; i++)
    if (arr[i] > mh.getMin())
        mh.replaceMax(arr[i]);

    // Return root
    return mh.getMin();
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
    int n = (a, sizeof(a) / sizeof(int));
    cout << "K'th biggest  element is " << kthBiggest(a, n, 3);
    return 0;
}

5.期望线性时间的方法-随机选择法（使用快速排序）

回忆一下快排算法，每次都根据主元将数组划分为两部分，一部分数全部比主元要小，另一部分全部比主元要大，划分完之后可以得到主元的位置，也就是可以知道主元是数组中第多大的数。那么可以根据此信息，来找到第 k 大的数，每次都只对划分后的一部分数进行处理。如果主元刚好是第 k 大的数则直接返回主元就可以了。否则更具主元的位置来缩小查找空间，迭代查找。
为了得到期望的时间复杂度为线性时间复杂度，快速排序中的主元的选择使用随机数。
该算法的期望时间复杂度为O(n)

// C++ implementation of randomized quickSelect
#include<iostream>
#include<climits>
#include<cstdlib>
using namespace std;

int randomPartition(int arr[], int l, int r);

// This function returns k'th biggest element in arr[l..r] using
// QuickSort based method. ASSUMPTION: ELEMENTS IN ARR[] ARE DISTINCT
int kthBiggest(int arr[], int l, int r, int k)
{
    // If k is smaller than number of elements in array
    if (k > 0 && k <= r - l + 1)
    {
        // Partition the array around a random element and
        // get position of pivot element in sorted array
        int pos = randomPartition(arr, l, r);

        // If position is same as k
        if (pos - l == k - 1)
            return arr[pos];
        if (pos - l > k - 1)  // If position is more, recur for left subarray
            return kthBiggest(arr, l, pos - 1, k);

        // Else recur for right subarray
        return kthBiggest(arr, pos + 1, r, k - pos + l - 1);
    }

    // If k is more than number of elements in array
    return INT_MIN;
}

void swap(int *a, int *b)
{
    int temp = *a;
    *a = *b;
    *b = temp;
}

// Standard partition process of QuickSort().  It considers the last
// element as pivot and moves all smaller element to right of it and
// greater elements to left. This function is used by randomPartition()
int partition(int arr[], int l, int r)
{
    int x = arr[r], i = l;
    for (int j = l; j <= r - 1; j++)
    {
        if (arr[j] > x)
        {
            swap(&arr[i], &arr[j]);
            i++;
        }
    }
    swap(&arr[i], &arr[r]);
    return i;
}

// Picks a random pivot element between l and r and partitions
// arr[l..r] arount the randomly picked element using partition()
int randomPartition(int arr[], int l, int r)
{
    int n = r - l + 1;
    int pivot = rand() % n;
    swap(&arr[l + pivot], &arr[r]);
    return partition(arr, l, r);
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
    int n = (a, sizeof(a) / sizeof(int));
    cout << "K'th biggest element is " << kthBiggest(a, 0, n - 1, 2);
    return 0;
}

6.最坏时间为线性时间的方法-BFPRT

算法导论中介绍了一种方法，能保证最坏情况下该算法依旧是线性时间复杂度。
算法步骤：
1.将数组的 n 个数划分为 n/5 组，每组 5 个元素。
2.寻找这 n/5 组中每一组的中位数：首先对每组元素进行插入排序，然后确定每组有序元素的中位数。
3.对第 2 步中找出的 n/5 个中位数，递归调用 BFPRT 查找算法本身来找到这 n/5 个数的中位数 $x$ 。
4.利用修改过的划分函数，按中位数的中位数 $x$ 对数组进行划分，返回划分完之后 $x$ 的位置。
5.如果 $x$ 刚好是第 k 大的数（划分后大于 $x$ 的数放在前一部分，小于 $x$ 的数放在后半部分，如 $x$ 的位置索引是k-1，则说明 $x$ 是第k大的数），直接返回 $x$ ；若 $x$ 的索引小于k-1，则说明第 k 大的元素在右半部分小于 $x$ 的数中，对右半部分进行递归查找；若 $x$ 的索引大于k-1，则说明第 k 大的元素在左半部分大于 x <script id="MathJax-Element-13" type="math/tex">x</script>的数中，对左半部分进行递归查找。

可以用一个递归式来得到算法的时间复杂度。令T(n)为时间复杂度，则1、2、4都需要O(n)时间，步骤三需要T(n/5)，步骤5最多需要 T(7n/10+6)，所以递归式：
T(n) <= T(n/5)+T(7n/10)+O(n)
可以求得T(n) = O(n)

//implement max heap sort
#include "iostream"
#include "math.h"

void print_array(int a[], int beg, int end)
{
    for (int i = beg; i <= end; i++)
        std::cout <<  a[i] << " ";
    std::cout << std::endl;
}

// partition and rank from big to small
// ran partition use the a[pivot] as pivot
// and return the new index of the a[pivot]
int partition(int a[], int low, int high, int pivot)
{
    std::swap(a[high], a[pivot]);
    //the index of the first place for replaced
    int index_first_nosmall = low;

    for (int i = low; i < high; i++)
    {
        if (a[i] > a[high])
        {
            std::swap(a[i], a[index_first_nosmall++]);
        }
    }
    std::swap(a[high], a[index_first_nosmall]);
    return index_first_nosmall;
}

//insert sort, from big to small
void InsertSort(int a[], int low, int high)
{
    for (int i = low+1; i <= high; i++)
    {
        int temp = a[i];
        int j = i - 1;
        while (j >= low && a[j] < temp)
        {
            a[j + 1] = a[j];
            j--;
        }
        a[j+1] = temp; //do not swap every time
    }
}

//insert sort
void InsertSort_Lite(int a[], int low, int high)
{
    for (int i = low + 1; i <= high; i++)
    {
        int j = i - 1;
        while (j >= low && a[j] < a[j+1])
        {
            std::swap(a[j], a[j+1]);
            j--;
        }
    }
}

int Find_k_Biggest(int a[], int low, int high, int k)
{
    std::cout << "print_array(a, low, high):" << low << " , " << high <<" k: "<<k<< std::endl;
    print_array(a, low, high);

    if (high + 1 - low <= 5)
    {
        InsertSort(a, low, high);
        return a[low + k - 1];
    }

    int j = low;
    for (int i = 0; i <= high; i+=5)
    {
        int end = (i + 4 <= high ? (i + 4) : high);
        InsertSort(a, i, end);
        print_array(a, i, end);
        std::swap(a[j++], a[(i+end)>>1]);
        if (end != i + 4) break;
    }
    print_array(a, low, high);

    //find the median of the medians
    int pivot = (low + j -1) >> 1;
    Find_k_Biggest(a, low, j-1, pivot+1-low);

    //use pivot to partition the array
    int resulted_pivot = partition(a, low, high, pivot);
    int current_rank = resulted_pivot + 1 - low;

    if (current_rank == k) return a[resulted_pivot];
    else if (resulted_pivot < k) return Find_k_Biggest(a, resulted_pivot+1, high, k-resulted_pivot);
    else return Find_k_Biggest(a, low, resulted_pivot - 1, k);
}

int main()
{

    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20};
    int n = (a, sizeof(a) / sizeof(int));

    print_array(a, 0, n-1);

    std::cout << Find_k_Biggest(a, 0, n - 1, 2) << std::endl;;
    print_array(a, 0, n-1);

    return 0;
}