算法导论在第九章<中位数和顺序统计量>中讲的很清楚了。
方法一: 比较naive的办法就是首先对数组排序, 然后取第k个元素。
时间复杂度随我们选择的排序算法的复杂度决定。
// simple solution
#include <iostream>
#include <algorithm>
using namespace std;
// function to return kth smallest element
int kthSmallest(int arr[], int n, int k) {
// sort the given array
sort(arr, arr + n);
// return the kth element in the sorted array
return arr[k - 1];
}
int main() {
int arr[] = {12, 3, 4, 5, 7, 19};
int n = sizeof(arr) / sizeof(arr[0]);
int k = 2;
cout << "kth smallest is " << kthSmallest(arr, n, k);
return 0;
}
方法二 (QuickSelect) : 借用 随机randomized quick sort 中的思想, 一个pivot x 将一个数组分成两部分。 我们记 x 的次序为rank(x), 那么分完之后, 原数组分成rank小于x的在 ran(k)的左边, 大于x的rank 位于左边。 接下来, 判断 k 和 rank(x)的关系, 如果 k < rank(x), 则在[0, rank(x)) 中寻找。 k = rank(x), 则返回x, 就是我们找到的, 若k > rank(x), 则在rank(x)的左边寻找第 k - rank(x)的数字。 就这样递归下去, 直至找到为止。
1. Pick an element within current segment
and call it the pivot
2. Count elements that are smaller and
elements that are larger than the pivot
3. If number of elements smaller than the pivot
is larger than K, then move those elements
to the beginning of the array and run
the algorithm recursively only on that part of the array.
4. Otherwise, if number of elements smaller than the pivot
plus number of elements equal to the pivot is larger
than K, then Kth element is equal to pivot
so just return the pivot and finish.
5. Otherwise, move all elements larger than the pivot
to the beginning of the array and run the algorithm
recursively only on that part of the array.
程序如下:
#include <iostream>
#include <climits>
using namespace std;
int Partition(int arr[], int l, int r);
// this function returns kth smallest element in arr[...r]
// using quicksort based method. Assumption: all elements in arr[]
// are distinct
int kthSmallest(int arr[], int l, int r, int k) {
// if k is smaller than the number of element in array
if(k > 0 && k <= r - l + 1) {
// Partition the array around last element and get
// and get postion of pivot element in sorted array
int pos = Partition(arr, l, r);
// if position is the same as k
if(pos - l == k - 1) // if position is more, recur for left sub array
return arr[pos];
if(pos - l > k - 1)
return kthSmallest(arr, l, pos - 1, k);
// else recur for right subarray
return kthSmallest(arr, pos + 1, r, k - pos + l - 1);
}
// if k is more than number of elements in array
return INT_MAX;
}
void Swap(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}
// standard partition process of quick sort
// It considers the last element as pivot and moves
// all smaller element to left of it
int Partition(int arr[], int l, int r) {
int x = arr[r], i = l;
for(int j = l; j <= r - 1; j++) {
if(arr[j] <= x) {
Swap(&arr[i], &arr[j]);
i++;
}
}
Swap(&arr[i], &arr[r]);
return i;
}
// driver program
int main() {
int arr[] = {12, 2, 4, 5, 14, 6, 19};
int n = sizeof(arr) / sizeof(arr[0]);
int k = 3;
cout << "kth smallest element is " << kthSmallest(arr, 0, n - 1, k);
return 0;
}
该办法最坏情况的复杂度是O(n^2), 期望的情况是线性时间找到O(n)。
引入随机的程序, 使得期望的时间是线性的。 尽管最差的是O(n^2):
#include <iostream>
#include <cstdlib>
#include <climits>
using namespace std;
int RandomizedPivot(int *A,int l,int r)
{
int n = r - l + 1;
int PivotIndex = rand() % n + l; //Randomized Pivot to ensure Expected Running time to be O(N).
swap(A[PivotIndex],A[r]);
int i = l,j;
for(j=l;j<=r - 1;j++)
{
if(A[j] <= A[r])
{
swap(A[i],A[j]);
i++;
}
}
swap(A[i],A[r]);
return i;
}
int KthSmallest(int *A,int l,int r,int k)
{
if(l == r)
return A[l];
int q = RandomizedPivot(A,l,r);
if(q - l == k - 1)
return A[q];
else if(q - l > k - 1)
return KthSmallest(A,l,q-1,k);
else
return KthSmallest(A,q+1,r,k - q + l - 1);
return INT_MAX;
}
int main(void)
{
int A[] = {1,2,2,5,6,3};
int N = sizeof(A)/sizeof(A[0]);
int k = 4;
cout<<KthSmallest(A,0,N-1,k)<<endl;;
return 0;
}
方法三: 该算法采用devide and conquer的技术。 简称 medidans of medians, 算法流程如下: 我们可以设置C = 5, 即5个一组分组。
1. Divide the array into N/C columns of elements,
for small odd C.
2. Find the median of each column by sorting it.
3. Take only the medians and repeat steps 1-2 recursively
until only one value remains. That value is picked as the pivot.
4. Iterate through the array and count number of elements
strictly smaller than the pivot (S), larger than the pivot (L)
and equal to the pivot (E=N-S-L).
5. If N>K, move all values smaller than the pivot
to the beginning of the array and recursively run
the whole algorithm on that sub-array.
6. If N+E>K, conclude that Kth element is equal
to the current pivot so return the pivot value
and terminate the algorithm.
7. Otherwise, move all values larger than the pivot
to the beginning of the array and recursively run
the whole algorithm on that sub-array.
最坏情况的时间复杂度O(n)。
// C++ implementation of worst case linear time algorithm
// to find k'th smallest element
#include<iostream>
#include<algorithm>
#include<climits>
using namespace std;
int partition(int arr[], int l, int r, int k);
// A simple function to find median of arr[]. This is called
// only for an array of size 5 in this program.
int findMedian(int arr[], int n)
{
sort(arr, arr+n); // Sort the array
return arr[n/2]; // Return middle element
}
// Returns k'th smallest element in arr[l..r] in worst case
// linear time. ASSUMPTION: ALL ELEMENTS IN ARR[] ARE DISTINCT
int kthSmallest(int arr[], int l, int r, int k)
{
// If k is smaller than number of elements in array
if (k > 0 && k <= r - l + 1)
{
int n = r-l+1; // Number of elements in arr[l..r]
// Divide arr[] in groups of size 5, calculate median
// of every group and store it in median[] array.
int i, median[(n+4)/5]; // There will be floor((n+4)/5) groups;
for (i=0; i<n/5; i++)
median[i] = findMedian(arr+l+i*5, 5);
if (i*5 < n) //For last group with less than 5 elements
{
median[i] = findMedian(arr+l+i*5, n%5);
i++;
}
// Find median of all medians using recursive call.
// If median[] has only one element, then no need
// of recursive call
int medOfMed = (i == 1)? median[i-1]:
kthSmallest(median, 0, i-1, i/2);
// Partition the array around a random element and
// get position of pivot element in sorted array
int pos = partition(arr, l, r, medOfMed);
// If position is same as k
if (pos-l == k-1)
return arr[pos];
if (pos-l > k-1) // If position is more, recur for left
return kthSmallest(arr, l, pos-1, k);
// Else recur for right subarray
return kthSmallest(arr, pos+1, r, k-pos+l-1);
}
// If k is more than number of elements in array
return INT_MAX;
}
void swap(int *a, int *b)
{
int temp = *a;
*a = *b;
*b = temp;
}
// It searches for x in arr[l..r], and partitions the array
// around x.
int partition(int arr[], int l, int r, int x)
{
// Search for x in arr[l..r] and move it to end
int i;
for (i=l; i<r; i++)
if (arr[i] == x)
break;
swap(&arr[i], &arr[r]);
// Standard partition algorithm
i = l;
for (int j = l; j <= r - 1; j++)
{
if (arr[j] <= x)
{
swap(&arr[i], &arr[j]);
i++;
}
}
swap(&arr[i], &arr[r]);
return i;
}
// Driver program to test above methods
int main()
{
int arr[] = {12, 3, 5, 7, 4, 19, 26};
int n = sizeof(arr)/sizeof(arr[0]), k = 3;
cout << "K'th smallest element is "
<< kthSmallest(arr, 0, n-1, k);
return 0;
}
方法四(Using Min Heap – HeapSelect)
我们可以首先先用给定的n个元素建立一个小顶堆, 建堆的时间复杂度为O(n)(线性时间), 然后extractMin k 次既可以。
时间复杂度: O(n + k log(n))
程序如下:
// find kth smallest element using minheap
#include <iostream>
#include <climits>
using namespace std;
//swap two integers
void swap(int *x, int *y) {
int temp = *x;
*x = *y;
*y = temp;
}
// a class for min-heap
class MinHeap {
int *harr; // pointer to array of elements in heap
int capacity; // maximum possible size of minheap
int heap_size; // current number of elements in minheap
public:
MinHeap(int arr[], int n); // constructor
void MinHeapify(int i); // to heapify subtree rooted at i
int parent(int i) { return (i - 1) / 2;}
int left(int i) { return 2 * i + 1;}
int right(int i) { return 2 * i + 2;}
int extractMin(); // extract root element
int getMin() { return harr[0];} // return minimum
};
MinHeap::MinHeap(int a[], int n) {
heap_size = n;
harr = a; // store the address of the array
int i = (heap_size - 1) / 2;
while(i >= 0) {
MinHeapify(i);
--i;
}
}
// method to remove minimum elements
int MinHeap::extractMin() {
if(heap_size == 0) {
return INT_MAX;
}
// store the minimum value
int root = harr[0];
// if there are more than 1 items, move the last
// to the root and call heapify
if(heap_size > 1) {
harr[0] = harr[heap_size - 1];
MinHeapify(0);
}
heap_size--;
return root;
}
// a recursive method to heapify a subtree
// rooted at given index, this method assumes
// the subtrees are already heapified
void MinHeap::MinHeapify(int i) {
int l = left(i);
int r = right(i);
int smallest = i;
if(l < heap_size && harr[l] < harr[i]) {
smallest = l;
}
if(r < heap_size && harr[r] < harr[smallest]) {
smallest = r;
}
if(smallest != i) {
swap(&harr[i], &harr[smallest]);
MinHeapify(smallest);
}
}
// function to return kth smallest element in a given array
int kthSmallest(int arr[], int n, int k) {
// build a heap of n elements: O(n)
MinHeap mh(arr, n);
// do extract min k - 1 times
for(int i = 0; i < k - 1; i++) {
mh.extractMin();
}
// return root
return mh.getMin();
}
// driver program
int main() {
int arr[] = {12, 3, 5, 7, 19};
int n = sizeof(arr) / sizeof(arr[0]);
int k = 2;
cout << "kth smallest element is: " << kthSmallest(arr, n, k);
return 0;
}
方法五((Using Max-Heap)
我们可以建立一个大顶堆来解决这个问题, 具体办法如下:
1) Build a Max-Heap MH of the first k elements (arr[0] to arr[k-1]) of the given array. O(k)
2) For each element, after the k’th element (arr[k] to arr[n-1]), compare it with root of MH.
……a) If the element is less than the root then make it root and call heapify for MH
……b) Else ignore it.
// The step 2 is O((n-k)*logk)
3) Finally, root of the MH is the kth smallest element.
Time complexity of this solution is O(k + (n-k)*Logk)
程序如下:
// find kth smallest element using minheap
#include <iostream>
#include <climits>
using namespace std;
//swap two integers
void swap(int *x, int *y) {
int temp = *x;
*x = *y;
*y = temp;
}
// a class for min-heap
class MaxHeap {
int *harr; // pointer to array of elements in heap
int capacity; // maximum possible size of minheap
int heap_size; // current number of elements in minheap
public:
MaxHeap(int arr[], int n); // constructor
void MaxHeapify(int i); // to heapify subtree rooted at i
int parent(int i) { return (i - 1) / 2;}
int left(int i) { return 2 * i + 1;}
int right(int i) { return 2 * i + 2;}
int extractMax(); // extract root element
int getMax() { return harr[0];} // return max
// to replace root with new node x and heapify() new root
void replaceMax(int x) { harr[0] = x; MaxHeapify(0);}
};
MaxHeap::MaxHeap(int a[], int k) {
heap_size = k;
harr = a; // store the address of the array
int i = (heap_size - 1) / 2;
while(i >= 0) {
MaxHeapify(i);
--i;
}
}
// method to remove minimum elements
int MaxHeap::extractMax() {
if(heap_size == 0) {
return INT_MAX;
}
// store the max value
int root = harr[0];
// if there are more than 1 items, move the last
// to the root and call heapify
if(heap_size > 1) {
harr[0] = harr[heap_size - 1];
MaxHeapify(0);
}
heap_size--;
return root;
}
// a recursive method to heapify a subtree
// rooted at given index, this method assumes
// the subtrees are already heapified
void MaxHeap::MaxHeapify(int i) {
int l = left(i);
int r = right(i);
int largest = i;
if(l < heap_size && harr[l] > harr[i]) {
largest = l;
}
if(r < heap_size && harr[r] > harr[largest]) {
largest = r;
}
if(largest != i) {
swap(&harr[i], &harr[largest]);
MaxHeapify(harr[largest]);
}
}
// function to return kth smallest element in a given array
int kthSmallest(int arr[], int n, int k) {
// build a heap of n elements: O(n)
MaxHeap mh(arr, k);
for(int i = k; i < n; ++i) {
if(arr[i] < mh.getMax()) {
mh.replaceMax(arr[i]);
mh.MaxHeapify(0);
}
}
return mh.getMax();
}
// driver program
int main() {
int arr[] = {12, 3, 5, 7, 19};
int n = sizeof(arr) / sizeof(arr[0]);
int k = 2;
cout << "kth smallest element is: " << kthSmallest(arr, n, k);
return 0;
}