离散化的两种实现方式【sort或者map】

星空皓月

已于 2023-08-10 23:20:04 修改

阅读量938

点赞数 4

分类专栏：算法与数据结构文章标签：算法

于 2023-08-06 19:22:57 首次发布

本文链接：https://blog.csdn.net/qq_45249273/article/details/132125426

版权

算法与数据结构专栏收录该内容

15 篇文章 7 订阅

订阅专栏

本文探讨了离散化技术在处理大规模数据和优化算法效率中的策略，涉及区间查找、动态规划和数据压缩，举例了几个实际问题如市长海报、邮票问题和金发姑娘与牛问题的解决方案。

摘要由CSDN通过智能技术生成

离散化

定义

把无限空间中有限的个体映射到有限的空间中去，以此提高算法的时空效率。通俗的说，离散化是在不改变数据相对大小的条件下，对数据进行相应的缩小。
适用范围：数组中元素值域很大，但个数不是很多。
比如将a[]=[1,3,100,2000,500000]映射到[0,1,2,3,4]这个过程就叫离散化。

两种离散化方式

1.利用sort+unique进行数据离散化（适用于区间查找，及更新）

常与前缀和、树状数组、线段树和动态规范结合考查。

先来看一个金典题目：
题目
假定有一个无限长的数轴，数轴上每个坐标上的数都是 0。
现在，我们首先进行 n次操作，每次操作将某一位置 x 上的数加 c。
接下来，进行 m 次询问，每个询问包含两个整数 l 和 r，你需要求出在区间 [ l , r ] 之间的所有数的和。

输入格式：
第一行包含两个整数 n 和 m。
接下来 n行，每行包含两个整数 x 和 c。
再接下来 m 行，每行包含两个整数 l 和 r。

输出格式：
共 m 行，每行输出一个询问中所求的区间内数字和。

数据范围：
−10⁹ ≤ x ≤ 10⁹

1 ≤ n ≤ 10⁵

1 ≤ m ≤ 10⁵

−10⁹ ≤ l ≤ r ≤ 10⁹

− 10000 ≤ c ≤ 10000
输入样例
3 3
1 2
3 6
7 5
1 3
4 6
7 8
输出样例
8
0
5
思路：
由于1 ≤ n ≤ 10⁵ 和1 ≤ m ≤ 10⁵ 所调用的数字范围较小，而数轴范围较大，故可以将通过离散化处理，将−10⁹ ≤ x ≤ 10⁹
范围缩为 −10⁵ ≤ x ≤ 10⁵ 左右，大大提高效率。
在这里插入图片描述
代码

#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include<algorithm>
#include<vector>
using namespace std;
typedef pair<int, int> PII;
const int N = 3 * 1e5 + 10;
int a[N], s[N];
vector<int> alls;
vector<PII> add, query;

// 二分查找
int find(int x)
{
	int l = 0, r = alls.size() - 1;
	while (l < r)
	{
		int mid = l + ((r - l) >> 1);
		if (alls[mid] >= x) r = mid;
		else l = mid + 1;
	}
	return l + 1; // 由于S是从1开始的，所以对应映射位置都往前提一位
}
int main()
{
	int n, m;
	cin >> n >> m;
	for (int i = 0; i < n; ++i)
	{
		int x, c;
		cin >> x >> c;
		add.push_back({x, c});
		alls.push_back(x);
	}
	for (int i = 0; i < m; ++i)
	{
		int l, r;
		cin >> l >> r;
		query.push_back({l, r});
		alls.push_back(l);
		alls.push_back(r);
	}
	sort(alls.begin(), alls.end());
	alls.erase(unique(alls.begin(), alls.end()), alls.end());
	for (auto& item : add)
	{
		int x = find(item.first);
		a[x] += item.second;
	}
	for (int i = 1; i <= alls.size(); ++i) s[i] = s[i - 1] + a[i];
	for (auto& item : query)
	{
		int l = find(item.first), r = find(item.second);
		cout << s[r] - s[l - 1] << endl;
	}
	return 0;
}

当然这里也可以用数组+lower_bound来做，下面会说

1.1 Mayor’s posters POJ - 2528

在这里插入图片描述
思路：
可以将问题转化为，求区间的最小值，让每个区间的海报标记为第i个放的，然后用线段树来做。
查询每个海报区间的最小值是否等于i，如果等于i则没有被完全覆盖。本题的数据量太大，区间是[1, 1e7]，
所以要用离散化，存取l和r的相对大小，因为n = 1e4，所以离散过后的区间为[1, 2 * n]。
时间复杂度O(nlogn)

#include<cstdio>
#include<map>
#include<iostream>
#include<algorithm>
#define ls 		rt << 1, l, m
#define rs 		rt << 1 | 1,  m + 1, r
using namespace std;
const int maxn = 2e4 + 5;
int mi[maxn << 2], a[maxn], add[maxn<<2];
struct node{
	int l, r, pos;
}arr[maxn];
int n;
void pushUp(int rt){
	mi[rt] = min(mi[rt << 1], mi[rt << 1 | 1]);
}
//下推标记 
void pushDown(int rt){
	add[rt << 1] = add[rt];
	add[rt << 1 | 1] = add[rt];
	mi[rt << 1] = add[rt];
	mi[rt << 1 | 1] = add[rt];
	add[rt] = 0;
}
//建树，此过程可用memset代替 
void build(int rt, int l, int r){
	if (l == r){
		mi[rt] = 0;
		return;
	}
	int m = (l + r) >> 1;
	build(ls);
	build(rs);
	pushUp(rt);
}
//区间修改 
void update(int rt, int l, int r, int L, int R, int C){
	if (L <= l && r <= R){
		mi[rt] = C;
		add[rt] = C;
		return;
	}
	if(add[rt] != 0)	pushDown(rt);
	int m = (l + r) >> 1;
	if (L <= m)		update(ls, L, R, C);
	if (m < R)		update(rs, L, R, C);
	pushUp(rt);
}
//查询区间最小值 
int query(int rt, int l, int r, int L, int R){
	if (L <= l && r <= R){
		return mi[rt];
	}
	if (add[rt] != 0)	pushDown(rt);
	int m = (l + r) >> 1;
	int res = maxn;
	if (L <= m)		res = min(res, query(ls, L, R));
	if (m < R)		res = min(res, query(rs, L, R));
	return res;
}
//离散化 
void disc(int cnt){
	sort(a, a + cnt);
	int lena = unique(a, a + cnt) - a;
	for (int i = 0; i < n; ++i){
		arr[i].l = lower_bound(a, a + lena, arr[i].l) - a + 1;
		arr[i].r = lower_bound(a, a + lena, arr[i].r) - a + 1;
	//	printf("l = %d   r = %d\n", arr[i].l, arr[i].r);
	}
}
void solve(){
	scanf("%d", &n);
	build(1, 1, n << 1);
	int cnt = 0;
	for (int i = 0; i < n; ++i){
		scanf("%d%d", &arr[i].l, &arr[i].r);
		arr[i].pos = i + 1;
		a[cnt++] = arr[i].l;
		a[cnt++] = arr[i].r;
	}
	disc(cnt);
	for (int i = 0; i < n; ++i){
		update(1, 1, n << 1, arr[i].l, arr[i].r, arr[i].pos);
	} 
	int ans = 0;
	for (int i = 0; i < n; ++i){
		int x = query(1, 1, n << 1, arr[i].l, arr[i].r);
		//printf("x = %d\n", x);
		if (x == arr[i].pos)	ans++;
	}
	printf("%d\n", ans);
}
int main(){
	int t;
	scanf("%d", &t);
	for (int i = 0; i < t; ++i) solve();
	return 0;
}

2.利用map或unordered_map进行数据离散化（使用与非区间操作）

常与差分、并查集、DFS等算法结合考察

2.1 邮票 FZU - 2227【DFS】

题目描述

在这里插入图片描述
思路
可以先做映射关系（离散化），然后从最小的数并且这个数出现的次数为1开始dfs，走到哪直接就存到哪。
代码

#include<cstdio>
#include<vector>
#include<map>
#include<cstring>
using namespace std;
const int maxn=1e5+5;
map<int,int>index,data,num;
vector<int>v[maxn];//邻接表，存相对大小的关系 
int cnt,pos;
int dis[maxn];
bool vis[maxn];
void dfs(int x)
{
	if(cnt==pos)	return;
	//int len=v[x].size();
	for(int i=0;i<2;i++) // 题目保证有解，说明一个顶点最多有两个出度
	{
		int y=v[x][i];
		if(!vis[y])
		{
			vis[y]=true;
			dis[pos++]=data[y];
			dfs(y);
		}
		if(cnt==pos)	return;
	}
}
int main()
{
	int n;
	while(scanf("%d",&n)!=EOF)
	{
		int ui,vi;
		cnt=0,pos=0;
		index.clear();
		data.clear();
		num.clear();
		memset(vis,false,sizeof(vis));
		for(int i=0;i<2*n;i++)	v[i].clear();
		while(n--)
		{
			scanf("%d%d",&ui,&vi);
			if(!num[ui]) 
			{
				index[ui]=cnt++;
				data[index[ui]]=ui;//建立映射关系 （1，100） （100，1） 
			}
			if(!num[vi])
			{
				index[vi]=cnt++;
				data[index[vi]]=vi;
			}
			v[index[ui]].push_back(index[vi]);//建立相对大小的对应关系 
			v[index[vi]].push_back(index[ui]);
			num[ui]++;
			num[vi]++;
		}
		int st=0;
		for(map<int,int>::iterator it=num.begin();it!=num.end();it++)
		{
			if(it->second==1) //判断是否唯一出现，第一个唯一出现的肯定是字典序最小且起始的位置	
			{
				st=index[it->first];
				break;
			}
		}
		vis[st]=true;
		dis[pos++]=data[st];
		dfs(st);
		for(int i=0;i<cnt;i++)
		{
			printf("%d",dis[i]);
			if(i<cnt-1)	printf(" ");
			else	printf("\n");
		}
	}
}

2.2 More is better HDU - 1856 【并查集】

题目描述
在这里插入图片描述
思路：
在集合合并的时，如果两个集合不同，那么在合并成同一个集合的时候，
将被合并的集合里的个数加到这个集合里面，并统计最大值。
坑点：就是当输入n为0时，答案为1，因为当没有朋友对时，各自在自己的房间，所以说答案为1。
代码：

#include<iostream>
#include<cstdio>
#include<map>
#include<utility>
#include<vector>
#include<algorithm>
using namespace std;
const int maxn = 1e5 + 5;
map<int, int> mp;
int fa[maxn], num[maxn], ans = 0;
void init(int n){
	for (int i = 1; i <= n; ++i){
		fa[i] = i;
		num[i] = 1;
	}
}
int find(int x){
	return fa[x] == x ? fa[x] : fa[x] = find(fa[x]);
}
void _union(int x, int y){
	x = find(x);
	y = find(y);
	if(x != y){
		num[y] += num[fa[x]];
		ans = max(ans, num[y]);
		fa[x] = y;
	}
}
void solve(){
	int n, x, y;
	while (scanf("%d", &n) != EOF){
		int cnt = 0;
		ans = 0;
		init(maxn); 
		mp.clear();
		if (n == 0){
			puts("1");
			continue;
		}
		while (n--){
			scanf("%d%d", &x, &y);
			if(!mp[x])	mp[x] = ++cnt;
			if(!mp[y])	mp[y] = ++cnt;
			_union(mp[x], mp[y]);
		}
		printf("%d\n", ans);
	}
}
int main(){
	solve();
	return 0;
}

2.3 金发姑娘和 N 头牛 - 1952. AcWing题库

题目描述
在这里插入图片描述
思路：

代码

#include <iostream>
#include <cstring>
#include <map>
#include <algorithm>

using namespace std;

const int INF = 2e9;

int n, x, y, z;

int main()
{
    map<int, int> b; // 离散化及差分数组
    
    cin >> n >> x >> y >> z;
    for (int i = 0; i < n; i ++ )
    {
        //给三个大区间 + c
        int l, r;
        cin >> l >> r;
        //[-INF,r)
        b[-INF] += x;
        b[l] -= x;
        //[l, r]
        b[l] += y;
        b[r + 1] -= y;
        //b(r, INF]
        b[r + 1] += z;
        b[INF] -= z;
    }
    int res = 0, sum = 0;
    // 枚举差分数组，求前缀和，更新最大值
    
    for(auto item : b)
    {
        sum += item.second;// 前缀和
        res = max(res, sum);
    }
    
    cout << res;
    
    return 0;
}

星空皓月

关注

4
点赞
踩
9

收藏

觉得还不错? 一键收藏
打赏
0
评论
离散化的两种实现方式【sort或者map】

把无限空间中有限的个体映射到有限的空间中去，以此提高算法的时空效率。通俗的说，离散化是在不改变数据相对大小的条件下，对数据进行相应的缩小。适用范围：数组中元素值域很大，但个数不是很多。比如将a[]=[1,3,100,2000,500000]映射到[0,1,2,3,4]这个过程就叫离散化。
复制链接

扫一扫