OI 字符串常用哈希方法

最新推荐文章于 2023-05-04 20:34:59 发布

HfCloud

最新推荐文章于 2023-05-04 20:34:59 发布

阅读量2.9k

点赞数 4

分类专栏：算法文章标签：算法

本文链接：https://blog.csdn.net/u013632138/article/details/52760459

版权

算法专栏收录该内容

23 篇文章 1 订阅

订阅专栏

(同步个人博客 http://sxysxy.org/blogs/44 到csdn

下面这个方法来自算法竞赛入门经典-训练指南

方法

对于一个字符串s，令h[i] = h[i+1]*x + s[i] ，其中x是你自选的一个常数。令xp[i] = xp[i-1]*x

这样之后定义s的起点为下标i，长度为len的子串的哈希值为 h[i] - h[i+len]*xp[len] 。这个值与子串的位置，子串的内容，还有你自选的常数都有关系。哈希值使用unsigned long long(如果不是oi，可以使用int128的话当然更好)　不同子串的哈希值一定不会相同吗？不一定，但是相同的概率非常非常小。如果觉得不够保险可以分别选定两次x常数，双哈希来做。

应用

1 找最长公共子串 http://syzoj.com/problem/186

dp超时，后缀数组RE了?(orz Fmuckss神犇)，后缀自动机不会写？来写易于理解速度也相当不错的哈希吧。

这里有两个字符串，分别给两个字符串做哈希，然后二分直接二分答案len，计算两个字符串每个起点开始长度为len的子串的哈希(O(n)的)。然后给第两个字符串的长度为len的子串的哈希值排序。枚举第一个字符串的哈希，在第二个字符串的哈希里面用lower_bound找(这也是之前给它排序的原因，排好序就可以lower_bound)，找到了就说明存在这样长度的最长公共子串。于是AC代码如下(最后总共用时约1800ms，约是后缀自动机的9倍):

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <list>
#include <queue>
#include <algorithm>
using namespace std;
typedef unsigned long long ULL;
#define MAXN 200003 
ULL xp[MAXN];
ULL hasha[MAXN];
ULL hashb[MAXN];
ULL ha[MAXN];
ULL hb[MAXN];

char A[MAXN], B[MAXN];
int x = 233;
int la, lb;
int mlen;
int minlen;
bool check(int len)
{
    for(int i = 0; i < la-len+1; i++)
        hasha[i] = ha[i] - ha[i+len]*xp[len];
    for(int i = 0; i < lb-len+1; i++)
        hashb[i] = hb[i] - hb[i+len]*xp[len];
    sort(hashb, hashb+lb-len+1);
    for(int i = 0; i < la-len+1; i++)
    {
        ULL h = hasha[i];
        int p = lower_bound(hashb, hashb+lb-len+1, h)-hashb;
        if(p < lb-len+1 && h == hashb[p])return true;
    }
    return false;
}

int main()
{
    scanf("%s %s", A, B);
    la = strlen(A);
    lb = strlen(B);
    mlen = max(la, lb);
    minlen = min(la, lb);
    for(int i = la-1; i >= 0; i--)
        ha[i] = ha[i+1]*x + A[i];
    for(int i = lb-1; i >= 0; i--)
        hb[i] = hb[i+1]*x + B[i];
    xp[0] = 1;
    for(int i = 1; i <= mlen; i++)
        xp[i] = xp[i-1]*x;
    int l = 0, r = minlen;
    int ans;
    while(l <= r)
    {
        int m = (l+r)>>1;
        if(check(m))
        {
            ans = m;
            l = m+1;
        }else
            r = m-1;
    }
    printf("%d\n", ans);
    return 0;
}

2 出现次数超过m次的最长的子串 (uva 12206 https://uva.onlinejudge.org/index.php?option=com_onlinejudge&Itemid=8&page=show_problem&category=505&problem=3358&mosmsg=Submission+received+with+ID+18140839

最长这个条件优先，因此还是二分长度。每次检查是否存在这样一个出现次数超过m次的子串即可。将哈希值排序可以把哈希值相同的子串的哈希值排在一起，这样之后就能O(n)时间判断出超过次数是否超过m次。代码

/*
submition url:
https://uva.onlinejudge.org/index.php?option=com_onlinejudge&Itemid=8&category=505&page=show_problem&problem=3358
*/
#include <cstdio>
#include <cstdlib>
#include <cstdarg>
#include <cstring>
#include <list>
#include <queue>
#include <vector>
#include <algorithm>
#include <string>
using namespace std;
int m;
char str[40002];
typedef unsigned long long ULL;
ULL h[40002];
ULL hhash[40002];
ULL xp[40002];
int rrank[40002];
int x = 123;
int slen;
int pos;
bool cmp(int a, int b)
{
    return hhash[a] < hhash[b] || (hhash[a] == hhash[b] && a < b);
}
void calc()
{
    slen = strlen(str);
    h[slen] = 0;
    for(int i = slen-1; i >= 0; i--)
        h[i] = h[i+1]*x + str[i];
    xp[0] = 1;
    for(int i = 1; i <= slen; i++)
        xp[i] = xp[i-1]*x;
}
bool check(int len)
{
    for(int i = 0; i < slen-len+1; i++)
    {
        rrank[i] = i;
        hhash[i] = h[i] - h[i+len]*xp[len];
    }
    sort(rrank, rrank+slen-len+1, cmp);
    int times = 0;
    pos = -1;
    for(int i = 0; i < slen-len+1; i++)
    {
        if(i == 0 || hhash[rrank[i]] != hhash[rrank[i-1]])times = 0;
        if(++times >= m)
            pos = max(pos, rrank[i]);
    }
    return pos >= 0;
}
void solve()
{
    if(!check(1))
    {
        puts("none");
        return;
    }
    int l = 0, r = slen;
    int ans = 0;
    while(l <= r)
    {
        int m = (l+r)>>1;
        if(check(m))
        {
            ans = m;
            l = m+1;
        }else
            r = m-1;
    }
    check(ans);
    if(ans)
        printf("%d %d\n", ans, pos);
    else
        puts("none");
}
int main()
{
    while(true)
    {
        scanf("%d", &m);
        if(!m)break;
        scanf("%s", str);
        calc();
        solve();
    }
    return 0;
}

哈希用途广泛，是个非常好的东西。

HfCloud

关注

4
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
OI 字符串常用哈希方法

(同步个人博客 http://sxysxy.org/blogs/44 到csdn下面这个方法来自算法竞赛入门经典-训练指南方法对于一个字符串s，令h[i] = h[i+1]*x + s[i] ，其中x是你自选的一个常数。令xp[i] = xp[i-1]*x这样之后定义s的起点为下标i，长度为len的子串的哈希值为 h[i] - h[i+len]*xp[len] 。这个值与子串的位置，子串的内容，
复制链接

扫一扫