线性时间选择求第k小数（分治）

最新推荐文章于 2023-03-25 12:12:31 发布

开心生活_

最新推荐文章于 2023-03-25 12:12:31 发布

阅读量2.8k

点赞数 5

分类专栏：算法 C++ 文章标签：算法快速排序排序算法

本文链接：https://blog.csdn.net/weixin_45174651/article/details/109480411

版权

C++ 同时被 2 个专栏收录

7 篇文章 0 订阅

订阅专栏

算法

5 篇文章 0 订阅

订阅专栏

线性时间选择

问题描述：给定线性序集中 n 个元素和一个整数 k（1 <= k <= n），要求找出着 n 个元素中第 k 小的元素。
RandomizedSelect算法：
该算法实际上是模仿快速排序算法设计的，基本思想是对输入的数组进行递归划分。与快速排序的算法不同的是，它只对划分的数组之一进行递归处理。

划分：以数组的第一个元素作为基准值，设置两个变量分别从前后往中间走，把大于基准值的放在基准值的右半部分，而小于基准值的放在基准值左侧。

int Partition(Type a[], int p, int r) {
	// 划分， 以 a[p] 为基准，一分为二 
	int i = p, j = r + 1;
	int x = a[p];
	while(1) {
		while(a[++i] < x && i < r) ;
		while(a[--j] > x && i >= p) ;
		if (j <= i) break;
		swap(a[i], a[j]);
	}
	a[p] = a[j];
	a[j] = x;
	return j;
}
template <class Type>
int RandomizedSelect(Type a[], int p, int r, int k) {
	// 线性时间选择找第 k 个小的数 
	if (p == r) return a[p];
	int ind = Partition(a, p, r);
	int j = ind - p + 1; // 左侧区间的长度 
	if (j >= k) {
		return RandomSelect(a, p, ind);
	} else {
		return RandomSelect(a, ind + 1, r, k - j);
	}
}

在算法RandomizedSelect执行Partition后，数组a[p:r]被划分成两个子数组：a[p:ind] 、 a[ind + 1, r], 使得每个a[p:ind]数组里的元素都不大于a[ind + 1 : r] 里的元素。接着算法执行计算子数组里面的元素个数，判断第 k 个小的元素落在哪一个区间里面，分治的思想，对其中一个区间进行递归调用算法。
容易看出，在最坏的情况下线性时间算法需要 $O(n^2)$ 计算时间。例如：再找最小元素时，总在最大元素处进行划分。尽管如此，该算法的平均性能比较好。

优化

对划分进行随机数改良

对划分算法Partition进行优化，使用一个随机数产生器Random,随机的产生p-r之间的一个随机整数，即划分基准是随机的，这个时候线性时间算法的平均时间可以在 $O (n)$ 内找到。

template <class Type>
Type RandomizedPartition(Type a[], int p, int r) {
	int i = p, j = r + 1;
	//Type x = a[p];
	int s = (rand() % (r - p + 1) + p);  // 随机下标
	Type x = a[s];
	// 将小于 x 的元素放在左侧， 大于 x 的放在右侧 
	while(1) {
		while(a[++i] < x && i < r) ; // 找到一个不小于 x 的元素
		while(a[--j] > x && j >= p);
		if (i >= j) {
			break; // 全部完成，终止 
		} 
		swap(a[i], a[j]);
	}
	a[s] = a[j];
	a[j] = x;
	return j;
}

取中位数进行划分

在线性时间内找到一个划分基准，使得这个基准的划分的两个数组的长度都至少时原来的 $\alpha$ 倍（0 < $\alpha$ < 1），那么最坏情况也是O（n)的情况。例如：如果 $\alpha$ = 9 / 10, 算法递归调用所产生的子数组的长度最小缩短 1 / 10。所以，在最坏的情况下，算法所需要的计算时间T(n)满足递归式 $T (n) < = T (9 n / 10) + O (n)$ 。由此可得T(n) = O(n)。
划分基准的步骤如下：
1：将 n 个输入的元素每 5 个划分为一组，除了最后一个可能不是 5 个元素的组外，用任意一种排序算法进行排序，找出每组的中位数。
2：递归调用Select找出所有中位数的中位数，以这个数作为基准。
在这里插入图片描述

// 利用中位数来进行优化，时间复杂度最大也是O(n) 
void bubbleSort(int a[],int p,int r);
int Partition(int a[],int p,int r,int val){ // 传入val数值， 以数值val为基准值 
	int pos;
	for(int q=p; q<=r; q++){
		if(a[q]==val){
			pos=q;
			break;
		}
	}
	swap(a[p],a[pos]);
	int i=p,j=r+1,x=a[p];
	while(1){
		while(a[++i]<x&&i<r);
		while(a[--j]>x);
		if(i>=j)break;
		swap(a[i],a[j]);
	}
	a[p]=a[j];
	a[j]=x;
	return j;
}
template <class Type>
Type Select(Type a[], int p, int r, int k) {
	if (r - p < 75) {
		bubbleSort(a, p, r); // 用一个简单的算法排序，小于75时Select执行时间不超过一个常数 
		 ind = k - p + 1;
		 return a[k - p + 1];
	}
	for (int i = 0; i <= (r - p - 4) / 5; i++) {
		// 每 5 个元素分为一组
		int s=p+5*i,t=s+4;
		sort(a + s, a + s + t);
		swap(a[p+i],a[s+2]);//交换每组中的中位数到前面
	}
	int x = Select(a, p, (r - p - 4) / 4, r - p + 1); // 找中位数的中位数， 及全体的中位数
	int i = Partition(a, p, r, x), j = i - p + 1; // 以中位数作为划分 
	if (k <= j) {
		// 在左侧
		 return Select(a, p, i, k);
	} else {
		return Select(a, i + 1, k - j, k);
	}
}

下面给出上述算法的验证截图：
在这里插入图片描述

全部代码

全部代码在下面：

#include <cstdio>
#include <iostream>
#include <stdlib.h>
#include <algorithm>
#include <ctime>
#include "windows.h"
#define MAX_N 1000000
#define LEN 30
using namespace std;
/* 线性时间选择， 找第 k 小的元素 */ 

int arr[MAX_N + 5];
int n, mod, ind;
// 利用随机数来取基准值， 平均时间复杂度为O(n) 
template <class Type>
Type RandomizedPartition(Type a[], int p, int r) {
	int i = p, j = r + 1;
	//Type x = a[p];
	int s = (rand() % (r - p + 1) + p); 
	Type x = a[s];
	// 将小于 x 的元素放在左侧， 大于 x 的放在右侧 
	while(1) {
		while(a[++i] < x && i < r) ; // 找到一个不小于 x 的元素
		while(a[--j] > x && j >= p);
		if (i >= j) {
			break; // 全部完成，终止 
		} 
		swap(a[i], a[j]);
	}
	a[s] = a[j];
	a[j] = x;
	return j;
}

template <class Type>
Type RandomizedSelect(Type a[], int p, int r, int k) {
	if (p == r) {
		ind = p;
		return a[p];
	}
	int i = RandomizedPartition(a, p, r); // 将数组 a 一分为 2 
	int j = i - p + 1;
	if (k <= j) 
		return RandomizedSelect(a, p, i, k);
	else 
		return RandomizedSelect(a, i + 1, r, k - j);
}
void set_console_color(unsigned short color_index)
{
    SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), color_index);
}

// 利用中位数来进行优化，时间复杂度最大也是O(n) 
void bubbleSort(int a[],int p,int r)
{
	for(int i=p; i<r; i++)
	{
		for(int j=i+1; j<=r; j++)
		{
			if(a[j]<a[i])swap(a[i],a[j]);
		}
	}
}

int Partition(int a[],int p,int r,int val){ // 传入val数值， 以数值val为基准值 
	int pos;
	for(int q=p; q<=r; q++){
		if(a[q]==val){
			pos=q;
			break;
		}
	}
	swap(a[p],a[pos]);
	int i=p,j=r+1,x=a[p];
	while(1){
		while(a[++i]<x&&i<r);
		while(a[--j]>x);
		if(i>=j)break;
		swap(a[i],a[j]);
	}
	a[p]=a[j];
	a[j]=x;
	return j;
}
template <class Type>
Type Select(Type a[], int p, int r, int k) {
	if (r - p < 75) {
		bubbleSort(a, p, r); // 用一个简单的算法排序，小于75时Select执行时间不超过一个常数 
		 ind = k - p + 1;
		 return a[k - p + 1];
	}
	for (int i = 0; i <= (r - p - 4) / 5; i++) {
		// 每 5 个元素分为一组
		int s=p+5*i,t=s+4;
		sort(a + s, a + s + t);
		swap(a[p+i],a[s+2]);//交换每组中的中位数到前面
	}
	int x = Select(a, p, (r - p - 4) / 4, r - p + 1); // 找中位数的中位数， 及全体的中位数
	int i = Partition(a, p, r, x), j = i - p + 1; // 以中位数作为划分 
	if (k <= j) {
		// 在左侧
		 return Select(a, p, i, k);
	} else {
		return Select(a, i + 1, k - j, k);
	}
}

int main() {
	srand(time(NULL)); /*根据当前时间设置“随机数种子”*/
	set_console_color(9);
	cout << "请输入数据规模 n 和最大数据 mod " << endl;
	set_console_color(7);
	cin >> n >> mod;
	set_console_color(9);
	cout << "请输入 k " << endl;
	set_console_color(7);
	int k;
	cin >> k;
	for (int i = 0; i < n; i++) {
		arr[i] = rand() % mod;
	}
	//int ans = RandomizedSelect(arr, 0, n - 1, k);
	int ans = Select(arr, 0, n - 1, k);
	set_console_color(2);
	//cout << "ans = " << ans << endl;
    cout << "数组中第 " << k << " 小的数是 " << arr[ind] << endl;
   	set_console_color(7); 
	cout << "*******排序后的结果如下**********" << endl;
	sort(arr, arr + n);
	for (int i = 0; i < n; i++) {
		if (i - ind)
			cout << arr[i] << " ";
		else {
			set_console_color(2);
    		cout << arr[i] << " ";
   			set_console_color(7); 	
   		}
		if (i && i % 30 == 0) cout << endl;
	}
	cout << endl;
	return 0;
}

参考链接-算法：线性时间选择

开心生活_

关注

5
点赞
踩
35

收藏

觉得还不错? 一键收藏
0
评论
线性时间选择求第k小数（分治）

**元素选择问题：**给定线性序集中 n 个元素和一个整数 k（1 <= k <= n），要求找出着 n 个元素中第 k 小的元素。RandomizedSelect算法：该算法实际上是模仿快速排序算法设计的，基本思想是对输入的数组进行递归划分。与快速排序的算法不同的是，它只对划分的数组之一进行递归处理。**划分：**以数组的第一个元素作为基准值，设置两个变量分别从前后往中间走，把大于基准值的放在基准值的右半部分，而小于基准值的放在基准值左侧。template<class Type&
复制链接

扫一扫