PAT_1063: Set Similarity

最新推荐文章于 2022-08-16 16:03:35 发布

badmartin

最新推荐文章于 2022-08-16 16:03:35 发布

阅读量1.1k

点赞数

分类专栏：编程算法文章标签： PAT merge sort hash C C++

本文链接：https://blog.csdn.net/badmartin/article/details/10772779

版权

编程算法专栏收录该内容

53 篇文章 0 订阅

订阅专栏

Given two sets of integers, the similarity of the sets is defined to be N_c/N_t*100%, where N_c is the number of distinct common numbers shared by the two sets, and N_t is the total number of distinct numbers in the two sets. Your job is to calculate the similarity of any given pair of sets.

Input Specification:

Each input file contains one test case. Each case first gives a positive integer N (<=50) which is the total number of sets. Then N lines follow, each gives a set with a positive M (<=10⁴) and followed by M integers in the range [0, 10⁹]. After the input of sets, a positive integer K (<=2000) is given, followed by K lines of queries. Each query gives a pair of set numbers (the sets are numbered from 1 to N). All the numbers in a line are separated by a space.

Output Specification:

For each query, print in one line the similarity of the sets, in the percentage form accurate up to 1 decimal place.

Sample Input:

3
3 99 87 101
4 87 101 5 87
7 99 101 18 5 135 18 99
2
1 2
1 3

Sample Output:

50.0%
33.3%

备注：其实就是求两个集合的Jaccard系数（数据挖掘里常用的一个系数），算出两个集合的交集和并集大小即可。A过的是将两个集合边merge边统计个数的算法。还尝试了一下用hash的思想求解，但是最后一个case总是超时，不解原因，求高人指点。

A过的code：

#include<stdio.h>
#include<vector>
#include<algorithm>
using namespace std;

float ComputeSim(vector<int> s1,vector<int> s2)
{
	float sim = 0;
	int countCom = 0, countTotal = 0;
	int i=0,j=0;

	while(i<s1.size() && j<s2.size())
	{
		while(i+1<s1.size() && s1[i+1]==s1[i])
			i++;
		while(j+1<s2.size() && s2[j+1]==s2[j])
			j++;
		if(s1[i]>s2[j])
		{
			countTotal++;
			j++;
		}
		else if(s1[i]<s2[j])
		{
			countTotal++;
			i++;
		}
		else
		{
			countCom++;
			countTotal++;
			i++;
			j++;
		}
	}
	if(i==s1.size())
	{
		while(j<s2.size())
		{
			if(s2[j]!=s2[j-1])
			{
				countTotal++;
				j++;
			}
			else
				j++;
		}
	}
	else if(j==s2.size())
	{
		while(i<s1.size())
		{
			if(s1[i]!=s1[i-1])
			{
				countTotal++;
				i++;
			}
			else
				i++;
		}
	}

	sim = (float)countCom/(float)countTotal;

	return sim;
}

int main()
{
	int n_sets,n_num;
	int n_queries;
	vector<vector<int>> sets;
	
	scanf("%d",&n_sets);

	for(int i=0;i<n_sets;i++)
	{
		scanf("%d",&n_num);
		vector<int> num_list;
		for(int j=0;j<n_num;j++)
		{
			int temp_num;			
			scanf("%d",&temp_num);
			num_list.push_back(temp_num);
		}
		sort(num_list.begin(),num_list.end());
		sets.push_back(num_list);
	}

	scanf("%d",&n_queries);
	for(int i=0;i<n_queries;i++)
	{
		int i1,i2;
		scanf("%d %d",&i1,&i2);
		//calculate similartiy
		float sim = ComputeSim(sets[i1-1],sets[i2-1]);
		//output result
		printf("%.1f%%\n",sim*100);
	}

	return 0;
}

没有A过的超时的代码，用的hash思想：

#include<stdio.h>
#include<vector>
#include<algorithm>
#include<map>
using namespace std;

map<int,int> Mymap;

int CountTotalNums(vector<int> s1,vector<int> s2)
{
	int count = 0;
	for(int i=0;i<s1.size();i++)
		Mymap[s1[i]] = 1;	
	for(int i=0;i<s2.size();i++)
		Mymap[s2[i]] = 1;

	for(map<int,int>::iterator iter = Mymap.begin();iter!=Mymap.end();iter++)
	{
		if(iter->second==1)
			count++;
	}
	return count;
}

float ComputeSim(vector<int> s1,vector<int> s2)
{
	float sim = 0;

	int total = CountTotalNums(s1,s2);
	int com = s1.size()+s2.size()-total;

	sim = (float)com/(float)total;

	return sim;
}


int main()
{
	int n_sets,n_num;
	int n_queries;
	vector<vector<int>> sets;
	scanf("%d",&n_sets);
	map<int,int> Mymap1;

	for(int i=0;i<n_sets;i++)
	{
		scanf("%d",&n_num);
		Mymap1.clear();
		vector<int> num_list;
		for(int j=0;j<n_num;j++)
		{
			int temp_num;			
			scanf("%d",&temp_num);
			if(Mymap1[temp_num]!=1)
			{
				num_list.push_back(temp_num);
				Mymap1[temp_num]=1;
			}

		}
		
		sets.push_back(num_list);
	}

	scanf("%d",&n_queries);
	for(int i=0;i<n_queries;i++)
	{
		int i1,i2;
		scanf("%d %d",&i1,&i2);
		Mymap.clear();
		//calculate similartiy
		float sim = ComputeSim(sets[i1-1],sets[i2-1]);
		//output result
		printf("%.1f%%\n",sim*100);
	}

	return 0;
}

badmartin

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
PAT_1063: Set Similarity

Given two sets of integers, the similarity of the sets is defined to be Nc/Nt*100%, where Nc is the number of distinct common numbers shared by the two sets, and Nt is the total number of distinct n
复制链接

扫一扫