一、概述
取一个特殊的进位,比如小写字符串就取26进制,将字符串中的每个字符都转化为唯一对应的一个数字,再像十进制那样计算该字符串的总和,就能得到一个字符串对应的唯一确定值
首先不要把任意字符对应到数字0,比如假如把a对应到数字0,那么将不能只从Hash结果上区分ab和b(虽然可以额外判断字符串长度,但不把任意字符对应到数字0更加省事且没有任何副作用),一般而言,把a-z对应到数字1-26比较合适。
进制的选择实际上非常自由,大于所有字符对应的数字的最大值,不要含有模数的质因子(那还模什么)
二、模板
P3370 【模板】字符串哈希 - 洛谷 | 计算机科学教育新生态 (luogu.com.cn)
1.自然溢出哈希
unsigned long long溢出时会自动对取模,如果万恶的出题人不卡自然溢出的话
#include<bits/stdc++.h>
using namespace std;
#define qio ios::sync_with_stdio(0), cin.tie(0), cout.tie(0);
typedef unsigned long long ull;
const int N = 1e4 + 10;
int n, ans = 1;
ull a[N];
string s[N];
ull base = 131;
ull hashs(string s) {
int len = s.length();
ull res = 0;
for (int i = 0; i < len; i++) {
res = res * base + (ull)s[i];
}
return res;
}
int main() {
cin >> n;
for (int i = 1; i <= n; i++) {
cin >> s[i];
a[i] = hashs(s[i]);
}
sort(a + 1, a + 1 + n);
for (int i = 2; i <= n; i++) if (a[i] != a[i - 1]) ans++;
cout << ans;
}
2.单哈希
取一个很大很大的mod数(要是质数),之后再对哈希值取模,取到错误会较少
ull mod=212 370440130 137957 11;
#include<bits/stdc++.h>
using namespace std;
#define qio ios::sync_with_stdio(0), cin.tie(0), cout.tie(0);
typedef unsigned long long ull;
const int N = 1e4 + 10;
ull base = 131;
ull mod = 21237044013013795711;
int n, ans = 1;
ull a[N];
string s[N];
ull hashs(string s) {
int len = s.length();
ull res = 0;
for (int i = 0; i < len; i++) {
res = (res * base + (ull)s[i]) % mod;
}
return res;
}
int main() {
cin >> n;
for (int i = 1; i <= n; i++) {
cin >> s[i];
a[i] = hashs(s[i]);
}
sort(a + 1, a + 1 + n);
for (int i = 2; i <= n; i++) if (a[i] != a[i - 1]) ans++;
cout << ans;
}
3.双哈希
用两个的mod来取模
#include<bits/stdc++.h>
using namespace std;
#define qio ios::sync_with_stdio(0), cin.tie(0), cout.tie(0);
typedef unsigned long long ull;
const int N = 1e4 + 10;
ull base = 131;
ull mod = 21237044013013795711;
int n, ans = 1;
struct num {
ull x, y;
bool operator < (const num u) const {
return this -> x < u.x;
}
} a[N];
string s[N];
ull hash1(string s) {
int len = s.length();
ull res = 0;
for (int i = 0; i < len; i++) {
res = (res * base + (ull)s[i]) % mod;
}
return res;
}
ull hash2(string s) {
int len = s.length();
ull res = 0;
for (int i = 0; i < len; i++) {
res = (res * base + (ull)s[i]) % mod;
}
return res;
}
int main() {
cin >> n;
for (int i = 1; i <= n; i++) {
cin >> s[i];
a[i].x = hash1(s[i]);
a[i].y = hash2(s[i]);
}
sort(a + 1, a + 1 + n);
for (int i = 2; i <= n; i++)
if (a[i].x != a[i - 1].x && a[i].y != a[i - 1].y) ans++;
cout << ans;
}