The 1st Universal Cup Stage 8: Slovenia, March 18-19, 2023(Differences-字符串hash)

nike0good

已于 2023-03-25 19:47:44 修改

阅读量278

点赞数

文章标签：哈希算法算法

于 2023-03-25 19:25:58 首次发布

本文链接：https://blog.csdn.net/nike0good/article/details/129770888

版权

给定一个包含N个长度为M的字符串列表，字符串仅包含字符A,B,C,D。列表中存在一个特殊字符串，它与其他所有字符串的哈明距离为K。程序需找到这个特殊字符串。文章描述了一种利用哈明距离和随机数的方法来解决此问题的算法思路。

摘要由CSDN通过智能技术生成

We have a list of N strings Si. All strings have length M and consist only of characters A, B, C and D.
Let us define the distance between two strings X and Y as the number of indices j, where the strings have
different characters (Xj̸ = Yj ). We know that the list of strings Si contains precisely one special string
that has distance K to all other strings. Note that there might be other pairs of strings with a distance
of K. We are experiencing problems finding this special string, so please write a program to help us out
Input
The first line contains space-separated integers N , M and K. Strings Si are given in the following N lines.
Constraints
• 2 ≤ N, M ≤ 105
• 1 ≤ K ≤ M
• N M ≤ 2 · 107
Output
Output the index i of the special string. Strings are numbered from 1 to N as given in the input.
Examples
standard input standard output
5 10 2
DCDDDCCADA
ACADDCCADA
DBADDCCBDC
DBADDCCADA
ABADDCCADC
4
4 6 5
AABAAA
BAABBB
ABAAAA
ABBAAB

题意：给 $n$ 个长度为 $m$ 的串，串的字符集为 $A, B, C, D$ 。
找出一个串，和任何一个串Hamming距离均为为 $k$ 。

考虑hash，给每个串随机分配一个随机数 $p$ ，
$f [i] [j]$ 表示第i个位置为字母 $j$ 的串的 $p$ 的和
若答案为第 $i$ 个串，显然有 $k\sum_{j\ne i } p_j=\sum_{l=1}^m\sum_{j \ne s[i][l]}f[l][j]$
为了保障成立可以多随机几次。

#include<bits/stdc++.h> 
using namespace std;
#define For(i,n) for(int i=1;i<=n;i++)
#define Fork(i,k,n) for(int i=k;i<=n;i++)
#define ForkD(i,k,n) for(int i=n;i>=k;i--)
#define Rep(i,n) for(int i=0;i<n;i++)
#define ForD(i,n) for(int i=n;i;i--)
#define RepD(i,n) for(int i=n;i>=0;i--)
#define Forp(x) for(int p=pre[x];p;p=next[p])
#define Forpiter(x) for(int &p=iter[x];p;p=next[p])  
#define Lson (o<<1)
#define Rson ((o<<1)+1)
#define MEM(a) memset(a,0,sizeof(a));
#define MEMI(a) memset(a,0x3f,sizeof(a));
#define MEMi(a) memset(a,128,sizeof(a));
#define MEMx(a,b) memset(a,b,sizeof(a));
#define INF (0x3f3f3f3f)
#define pb push_back
#define mp make_pair
#define fi first
#define se second
#define vi vector<int> 
#define pi pair<int,int>
#define SI(a) ((a).size())
#define Pr(kcase,ans) printf("Case #%d: %lld\n",kcase,ans);
#define PRi(a,n) For(i,n-1) cout<<a[i]<<' '; cout<<a[n]<<endl;
#define PRi2D(a,n,m) For(i,n) { \
						For(j,m-1) cout<<a[i][j]<<' ';\
						cout<<a[i][m]<<endl; \
						} 
#pragma comment(linker, "/STACK:102400000,102400000")
#define ALL(x) (x).begin(),(x).end()
#define gmax(a,b) a=max(a,b);
#define gmin(a,b) a=min(a,b);
typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
ll F[2]={1000000007,1000000009};
ll mul(ll a,ll b,ll F){return (a*b)%F;}
ll add(ll a,ll b,ll F){return (a+b)%F;}
ll sub(ll a,ll b,ll F){return ((a-b)%F+F)%F;}
void upd(ll &a,ll b,ll F){a=(a%F+b%F)%F;}

inline int read()
{
	int x=0,f=1; char ch=getchar();
	while(!isdigit(ch)) {if (ch=='-') f=-1; ch=getchar();}
	while(isdigit(ch)) { x=x*10+ch-'0'; ch=getchar();}
	return x*f;
} 
vector<string> v;
#define MAXN (2000000+10)
vector<vector<vector<ll> > > f;
#define MAXM (2000000+10)
ll p[101010][2];
int n,m,k;
bool ck(int i) {
	Rep(j,n) {
		if(i!=j) {
			int p=0;
			Rep(l,m) p+=v[i][l]!=v[j][l];
			if(p^k) return 0;	
		}
	}return 1;
}
int main()
{
//	freopen("F.in","r",stdin);
//	freopen(".out","w",stdout);
	p[0][0]=p[0][1]=1;
	cin>>n>>m>>k;
	For(i,n) Rep(l,2) {
		p[i][l]=((ll)RAND_MAX*rand()+rand())%F[l];
	}
	Rep(i,n) {
		string s;
		cin>>s;
		v.pb(s);
	}
	f.resize(m);
	Rep(i,m) {
		f[i].resize(4);
		Rep(j,4) {
			f[i][j].assign(2,0);
		}
	}
	Rep(i,n) {
		Rep(j,m) {
			int t=v[i][j]-'A';
			Rep(l,2) upd(f[j][t][l],p[i][l],F[l]);
		}
	}
	ll s[2]={0,0};
	Rep(i,n) Rep(l,2) upd(s[l],mul(p[i][l],k,F[l]),F[l]);
	Rep(i,n) {
		ll ans[2]={};
		Rep(j,m) {
			int t=v[i][j]-'A';
			Rep(k,4) {
				if(t!=k) Rep(l,2) upd(ans[l],f[j][k][l],F[l]);
			}
		}
		ll s2[2]={};
		Rep(l,2) s2[l] = sub(s[l],mul(p[i][l],k,F[l]),F[l]);
		bool fl=1;
		Rep(l,2) if(s2[l]!=ans[l]) fl=0;
		
		if(fl) {
			if(ck(i)){
				cout<<i+1<<endl;
				return 0;
			}
		}
	}
	
	return 0;
}