题目:
Junk-Mail Filter
Time Limit: 15000/8000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)
Total Submission(s): 4120 Accepted Submission(s): 1254
1) Extract the common characteristics from the incoming email.
2) Use a filter matching the set of common characteristics extracted to determine whether the email is a spam.
We want to extract the set of common characteristics from the N sample junk emails available at the moment, and thus having a handy data-analyzing tool would be helpful. The tool should support the following kinds of operations:
a) “M X Y”, meaning that we think that the characteristics of spam X and Y are the same. Note that the relationship defined here is transitive, so
relationships (other than the one between X and Y) need to be created if they are not present at the moment.
b) “S X”, meaning that we think spam X had been misidentified. Your tool should remove all relationships that spam X has when this command is received; after that, spam X will become an isolated node in the relationship graph.
Initially no relationships exist between any pair of the junk emails, so the number of distinct characteristics at that time is N.
Please help us keep track of any necessary information to solve our problem.
Each test case starts with two integers, N and M (1 ≤ N ≤ 10 5 , 1 ≤ M ≤ 10 6), the number of email samples and the number of operations. M lines follow, each line is one of the two formats described above.
Two successive test cases are separated by a blank line. A case with N = 0 and M = 0 indicates the end of the input file, and should not be processed by your program.
5 6 M 0 1 M 1 2 M 1 3 S 1 M 1 2 S 3 3 1 M 1 2 0 0
Case #1: 3 Case #2: 2
题目链接:
http://acm.hdu.edu.cn/showproblem.php?pid=2473
题意:
(同学写得挺好的,就copy过来了)
N 是 标号为 0~(N-1)的邮件 M是有M行数据。
第二行开始为数据 当输入为M时之后跟着的两个编号表示这两封邮件都为一种垃圾邮件。
当输入为S时跟着的一个标号表示这封邮件被误判,这不是封垃圾邮件,而之前与这封邮件同时被判为垃圾邮件的那封邮件还是垃圾邮件。
输出为有多少种邮件(垃圾邮件也分很多种)。
坑爹:
1.当要将一封垃圾邮件变为普通邮件时,如果在这个垃圾邮件的树中作为根的话,那么变为普通邮件时要将剩余的垃圾邮件重新合并起来。
2.当要搜索有多少个集合(邮件种类)的时候要查找 father[i] == i 这种有多少个,但如果是 (0-1-2-3-4)(假设根为0),将01234这些点都删除了,
用 father[i] == i 来找集合的话会把前面的 (0邮件)也会算上的。
解法:
用一个代理的数组(起始跟输入的值一样),访问、删除的时候将index数组的值改变就行了(删除就将index[i]等于代理数组下标为n以后的位置),而合并就用代理数
组进行操作就行了。
/*#include <iostream>
using namespace std ;
const int maxn = 1100000 +10;
int father[maxn]; //father[x]表示x的父节点
int index[maxn];
bool used[maxn];
void make_set()
{
for(int i=0;i<maxn;i++)
{
father[i]=i; //初始化一开始每个节点的父节点都为本身
index[i]=i;
}
}
int findroot(int x)// 寻找x元素所在的集合也就是找子节点的根节点
{
while(father[x] != x)
{
x=father[x]; //这是一个递归的过程,回溯时压缩路径
}
return x;
}
void Union(int x,int y) //合并两个不相交的集合,x,y分别为两个不同的集合
{
x = findroot(index[x]);
y = findroot(index[y]);
if(x != y)
{
father[x] = y;
}
}
*/
#include <iostream>
using namespace std ;
const int maxn = 1100000 +10;
int father[maxn]; //father[x]表示x的父节点
int index[maxn];
bool used[maxn];
int rank[maxn]; //rank[x] 秩,表示x节点所在树的深度
int save[maxn];// 用来记录查找根节点时,途中所路过的节点,压缩路径的时候用到
void make_set()
{
for(int i=0;i<maxn;i++)
{
father[i]=i; //初始化一开始每个节点的父节点都为本身
index[i]=i;
rank[i] = 0;
}
}
int findroot (int a )// 寻找x元素所在的集合也就是找子节点的根节点
{ //save[maxn] 的定义不能放在findroot函数里面
int k=0;
while ( a != father[a] )
{
save[k++] = a;
a = father[a];
}
for(int i=0;i<k;i++)
father[ save[i] ] = a;
return a;
}
/*
int findroot(int x)// 寻找x元素所在的集合也就是找子节点的根节点
{
while(father[x] != x)
{
x=father[x]; //这是一个递归的过程,回溯时压缩路径
}
return x;
}
*/
void Union(int x,int y) //合并两个不相交的集合,x,y分别为两个不同的集合
{
x = findroot(index[x]);
y = findroot(index[y]);
if(x != y)
{
if(rank[x] > rank[y]) //如果x树的深度比y树深,y树接到x树
{
father[y] = x;
}
else if(rank[x] < rank[y])
{
father[x] = y;
}
else if(rank[x] ==rank[y]) //若两树的深度一样
{
father[x] = y; //则x树接到y树
rank[y]++; //此时y树的深度+1
}
}
}
int main()
{
int n,m,i;
int ans = 0;
while(cin>>n>>m , n+m)
{
ans++;
memset(used, 0 ,sizeof(used));
int flag = n;
make_set();
char ch;
int a,b;
for(i=0; i<m; i++)
{
cin>>ch;
if(ch == 'M')
{
cin>>a>>b;
Union(a,b);
}
else if(ch == 'S')
{
int c;
cin>>c;
index[c] = flag;
flag++;
}
}
int count=0;
for(i=0; i<n; i++)
{
int p;
p = findroot(index[i]);
if(!used[p])
{
used[p] = 1;
count++;
}
}
cout<<"Case #"<<ans<<": "<<count<<endl;
}
}
//并查集要优化 不然会超时