参考博客:点击打开链接
字符串hash典例。 这里用的是bkdrhash 法。也是最常用的冲突最少的一种。原理:把字符串和数值对应。这里用base=31(一般用质数),
先是扫一遍,处理处每个位子到结尾构成的串的hash值(倒过来的),然后长度为l的子串的haash值就好算了。
之后枚举开头l个,每次向后翻滚,复杂度max(L*M, L*(S.SIZE/M))可以过,这里用了map判重下。若枚举开头扫一遍,姿势不优越过不了,极限可能:m=50000,l=1,复杂度(s,size*m)会超时。
关键一:那里求hash值的时候+1,否则100,10这种hash值一样。
开始担心这样减hash值会因为爆出现负值。其实不然:其一, unsigned long long ,自动取模
其二:因为每次从后面向前推导:hash[i] = hash[i+1]*base+s[i]-‘a‘+1; ,本质自动取模,所以:hash[i]=s[i]-‘a‘+1+hash[i+l]*nbase[l] (每步自动取模),由于 s[i]-‘a‘+1 非负,所以有
hash[i]>hash[i+l]*nbase[l]
WA:
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
const int maxn = 100050;
const int seed = 31;
int l,m;
unsigned long long base[maxn],h[maxn],seq[maxn],ans[maxn];
char str[maxn];
int calc(int left,int len)
{
int right = left + len - 1;
return h[left] - h[right+1]*base[len];
}
int main()
{
base[0] = 1;
for(int i = 1; i <= maxn; i++)
base[i] = base[i-1]*seed;
while(scanf("%d%d",&m,&l)!=EOF)
{
getchar();
scanf("%s",str);
int len = strlen(str);
h[len] = 0;
for(int i = len-1; i >= 0; i--)
h[i] = h[i+1]*seed + str[i] - 'a' + 1;
memset(seq,0,sizeof(seq));
memset(ans,0,sizeof(ans));
for(int i = 0; i < m; i++) ans[i] = 1;
for(int i = 0; i < len - m; i++)
{
int key = calc(i,m);
seq[i] = key;
if(i < m) continue;
if(seq[i] != seq[i-m])
ans[i] = ans[i-m] + 1;
}
int sum = 0;
for(int i = 0; i < len; i++)
if(ans[i] >= l)
sum += ans[i] - l + 1;
printf("%d\n",sum);
}
return 0;
}
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<map>
#include<string>
typedef unsigned long long ull;
using namespace std;
const int maxn= 100050;
const ull base =31;
ull nbase[maxn],hash[maxn];
int m,l;
map<ull, int> mp;
int main()
{
ull tmp;
nbase[0] = 1;
for (int i = 1;i<maxn; i++)
{
nbase[i]=nbase[i-1]*base;
}
while (~scanf("%d%d",&m,&l))
{
string s;
cin>>s;
int slen=s.size();
hash[slen] = 0;
for (int i = slen-1; i >= 0; i--)
hash[i] = hash[i+1]*base+s[i]-'a'+1; //关键1
int ans = 0;
for (int i = 0; i<l&&i+m*l<=slen; i++)
{
mp.clear();
for (int j = i; j<i+m*l; j += l)
{
tmp = hash[j] - hash[j+l]*nbase[l];
mp[tmp]++;
}
if (mp.size() ==m)
ans++;
for (int j=i+m*l; j+l<=slen; j +=l)
{
tmp = hash[j-m*l] - hash[j-(m-1)*l]*nbase[l];
mp[tmp]--;
if (mp[tmp] == 0)
mp.erase(tmp);
tmp = hash[j] - hash[j+l]*nbase[l];
mp[tmp]++;
if (mp.size() == m)
ans++;
}
}
printf("%d\n", ans);
}
return 0;
}