Given two sets of integers, the similarity of the sets is defined to be Nc/Nt*100%, where Nc is the number of distinct common numbers shared by the two sets, and Nt is the total number of distinct numbers in the two sets. Your job is to calculate the similarity of any given pair of sets.
Input Specification:
Each input file contains one test case. Each case first gives a positive integer N (<=50) which is the total number of sets. Then N lines follow, each gives a set with a positive M (<=104) and followed by M integers in the range [0, 109]. After the input of sets, a positive integer K (<=2000) is given, followed by K lines of queries. Each query gives a pair of set numbers (the sets are numbered from 1 to N). All the numbers in a line are separated by a space.
Output Specification:
For each query, print in one line the similarity of the sets, in the percentage form accurate up to 1 decimal place.
Sample Input:3 3 99 87 101 4 87 101 5 87 7 99 101 18 5 135 18 99 2 1 2 1 3Sample Output:
50.0% 33.3%
备注:其实就是求两个集合的Jaccard系数(数据挖掘里常用的一个系数),算出两个集合的交集和并集大小即可。A过的是将两个集合边merge边统计个数的算法。还尝试了一下用hash的思想求解,但是最后一个case总是超时,不解原因,求高人指点。
A过的code:
#include<stdio.h> #include<vector> #include<algorithm> using namespace std; float ComputeSim(vector<int> s1,vector<int> s2) { float sim = 0; int countCom = 0, countTotal = 0; int i=0,j=0; while(i<s1.size() && j<s2.size()) { while(i+1<s1.size() && s1[i+1]==s1[i]) i++; while(j+1<s2.size() && s2[j+1]==s2[j]) j++; if(s1[i]>s2[j]) { countTotal++; j++; } else if(s1[i]<s2[j]) { countTotal++; i++; } else { countCom++; countTotal++; i++; j++; } } if(i==s1.size()) { while(j<s2.size()) { if(s2[j]!=s2[j-1]) { countTotal++; j++; } else j++; } } else if(j==s2.size()) { while(i<s1.size()) { if(s1[i]!=s1[i-1]) { countTotal++; i++; } else i++; } } sim = (float)countCom/(float)countTotal; return sim; } int main() { int n_sets,n_num; int n_queries; vector<vector<int>> sets; scanf("%d",&n_sets); for(int i=0;i<n_sets;i++) { scanf("%d",&n_num); vector<int> num_list; for(int j=0;j<n_num;j++) { int temp_num; scanf("%d",&temp_num); num_list.push_back(temp_num); } sort(num_list.begin(),num_list.end()); sets.push_back(num_list); } scanf("%d",&n_queries); for(int i=0;i<n_queries;i++) { int i1,i2; scanf("%d %d",&i1,&i2); //calculate similartiy float sim = ComputeSim(sets[i1-1],sets[i2-1]); //output result printf("%.1f%%\n",sim*100); } return 0; }
没有A过的超时的代码,用的hash思想:
#include<stdio.h> #include<vector> #include<algorithm> #include<map> using namespace std; map<int,int> Mymap; int CountTotalNums(vector<int> s1,vector<int> s2) { int count = 0; for(int i=0;i<s1.size();i++) Mymap[s1[i]] = 1; for(int i=0;i<s2.size();i++) Mymap[s2[i]] = 1; for(map<int,int>::iterator iter = Mymap.begin();iter!=Mymap.end();iter++) { if(iter->second==1) count++; } return count; } float ComputeSim(vector<int> s1,vector<int> s2) { float sim = 0; int total = CountTotalNums(s1,s2); int com = s1.size()+s2.size()-total; sim = (float)com/(float)total; return sim; } int main() { int n_sets,n_num; int n_queries; vector<vector<int>> sets; scanf("%d",&n_sets); map<int,int> Mymap1; for(int i=0;i<n_sets;i++) { scanf("%d",&n_num); Mymap1.clear(); vector<int> num_list; for(int j=0;j<n_num;j++) { int temp_num; scanf("%d",&temp_num); if(Mymap1[temp_num]!=1) { num_list.push_back(temp_num); Mymap1[temp_num]=1; } } sets.push_back(num_list); } scanf("%d",&n_queries); for(int i=0;i<n_queries;i++) { int i1,i2; scanf("%d %d",&i1,&i2); Mymap.clear(); //calculate similartiy float sim = ComputeSim(sets[i1-1],sets[i2-1]); //output result printf("%.1f%%\n",sim*100); } return 0; }