@(ACM题目)[字符串]
KMP
A - Number Sequence
描述:给定A和B两个数列,问B在A中出现的位置。
分析:B为A子串,KMP模板题。
代码:
#include<bits/stdc++.h>
using namespace std;
const int maxn = 1000000 + 5;
const int maxm = 10000 + 5;
int fail[maxn], a[maxn], b[maxm];
int n, m;
int main()
{
int T;
scanf("%d", &T);
while(T--)
{
int n, m;
scanf("%d%d", &n, &m);
for(int i = 0; i < n; ++i) scanf("%d", &a[i]);
for(int i = 0; i < m; ++i) scanf("%d", &b[i]);
fail[0] = fail[1] = 0;
for(int i = 1; i < m; ++i)
{
int j = fail[i];
while(j && b[i] != b[j]) j = fail[j];
fail[i + 1] = (b[i] == b[j]) ? j + 1 : 0;
}
int j = 0;
int res = -1;
for(int i = 0; i < n; ++i)
{
while(j && b[j] != a[i]) j = fail[j];
if(b[j] == a[i]) ++j;
if(j == m)
{
res = i - m + 1 + 1;
break;
}
}
printf("%d\n", res);
}
return 0;
}
B - Oulipo
描述:给定字符串a和b,问b在a中出现的次数(各匹配的子串可以重叠)。
分析:KMP中,每次匹配后沿fail指针跳一下。
代码:
#include<bits/stdc++.h>
using namespace std;
const int maxn = 1000000 + 5;
const int maxm = 10000 + 5;
int fail[maxn];
char a[maxn], b[maxm];
int n, m;
int main()
{
int T;
scanf("%d", &T);
while(T--)
{
int n, m;
scanf("%s%s", b, a);
n = strlen(a);
m = strlen(b);
fail[0] = fail[1] = 0;
for(int i = 1; i < m; ++i)
{
int j = fail[i];
while(j && b[i] != b[j]) j = fail[j];
fail[i + 1] = (b[i] == b[j]) ? j + 1 : 0;
}
int j = 0;
int res = 0;
for(int i = 0; i < n; ++i)
{
while(j && b[j] != a[i]) j = fail[j];
if(b[j] == a[i]) ++j;
if(j == m)
{
++res;
j = fail[j];
}
}
printf("%d\n", res);
}
return 0;
}
C - 剪花布条
描述:给定字符串a和b,问b在a中出现的次数(各匹配的子串不可以重叠)。
分析:KMP中,每次匹配后不是fail指针跳一下,而是再从模式串开始位置0进行匹配。
代码:
#include<bits/stdc++.h>
using namespace std;
const int maxn = 1000 + 5;
const int maxm = 1000 + 5;
int fail[maxn];
char a[maxn], b[maxm];
int n, m;
int main()
{
int n, m;
while(~scanf("%s", a) && a[0] != '#')
{
scanf("%s", b);
n = strlen(a);
m = strlen(b);
fail[0] = fail[1] = 0;
for(int i = 1; i < m; ++i)
{
int j = fail[i];
while(j && b[i] != b[j]) j = fail[j];
fail[i + 1] = (b[i] == b[j]) ? j + 1 : 0;
}
int j = 0;
int res = 0;
for(int i = 0; i < n; ++i)
{
while(j && b[j] != a[i]) j = fail[j];
if(b[j] == a[i]) ++j;
if(j == m)
{
++res;
j = 0;
}
}
printf("%d\n", res);
}
return 0;
}
D - Cyclic Nacklace
描述:给定一个字符串,问最少在后面添加几个字符能使它周期的字符串。
分析:kmp求循环节模板题,循环节长度为n-fail[n]
。
代码:
#include<bits/stdc++.h>
using namespace std;
const int maxn = 100000 + 5;
char s[maxn];
int fail[maxn];
int main()
{
int T;
scanf("%d", &T);
while(T--)
{
scanf("%s", s);
int n = strlen(s);
fail[0] = fail[1] = 0;
for(int i = 1; i < n; ++i)
{
int j = fail[i];
while(j && s[j] != s[i]) j = fail[j];
fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
}
int res;
int len = n - fail[n];
if(len != n && n % len == 0) res = 0;
else res = len - fail[n] % len;
printf("%d\n", res);
}
return 0;
}
E - Period
描述:求字符串的所有周期。
分析:KMP求循环节模板题,字符串总长度除以循环节长度即为周期。注意对每个周期求解。
代码:
#include<bits/stdc++.h>
using namespace std;
const int maxn = 1e6 + 5;
char s[maxn];
int fail[maxn];
int main()
{
int n;
int Cas = 1;
while(~scanf("%d", &n) && n)
{
scanf("%s", s);
fail[0] = fail[1] = 0;
for(int i = 1; i < n; ++i)
{
int j = fail[i];
while(j && s[j] != s[i]) j = fail[j];
fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
}
printf("Test case #%d\n", Cas++);
int len, period;
for(int i = 2; i <=n; ++i)
{
int len = i - fail[i];
period = i / len;
if(i != len && i % len == 0)
printf("%d %d\n", i, period);
}
puts("");
}
return 0;
}
F - The Minimum Length
描述:将一个字符串A重复若干次得到字符串B=AAA…,取出B的一个子串C。现给定C,问A最短是多少
分析:KMP循环节模板题,模板中的循环节就满足后面可以有残缺的,如abcabcab
中,n-fail[n]
结果为3,允许后面有残缺的ab
。
代码:
#include<bits/stdc++.h>
using namespace std;
const int maxn = 1e6 + 5;
char s[maxn];
int fail[maxn];
int main()
{
int n;
int Cas = 1;
while(~scanf("%d", &n) && n)
{
scanf("%s", s);
fail[0] = fail[1] = 0;
for(int i = 1; i < n; ++i)
{
int j = fail[i];
while(j && s[j] != s[i]) j = fail[j];
fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
}
printf("%d\n", n - fail[n]);
}
return 0;
}
G - Power Strings
描述:给定周期字符串,求周期。
分析:KMP求循环节模板题。先判断是否是周期大于1的周期字符串(字符串长度是循环节的倍数),是则输出周期,不是则周期为1。
代码:
#include<cstdio>
#include<cstring>
using namespace std;
const int maxn = 1e6 + 5;
char s[maxn];
int fail[maxn];
int main()
{
int n;
int Cas = 1;
while(~scanf("%s", s) && s[0] != '.')
{
n = strlen(s);
fail[0] = fail[1] = 0;
for(int i = 1; i < n; ++i)
{
int j = fail[i];
while(j && s[j] != s[i]) j = fail[j];
fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
}
int len = n - fail[n];
if(n % len == 0) printf("%d\n", n / len);
else puts("1");
}
return 0;
}
H - Seek the Name, Seek the Fame
描述:给定一个字符串,找出所有“既是该字符串前缀,又是该字符串后缀”的字符串。
分析:KMP中,从fail[n]
一直沿着fail指针跳到字符串开始即可。
代码:
#include<cstdio>
#include<cstring>
#include<stack>
using namespace std;
const int maxn = 400000 + 5;
char s[maxn];
int fail[maxn];
stack<int> st;
int main()
{
while(~scanf("%s", s))
{
while(!st.empty()) st.pop();
int n = strlen(s);
fail[0] = fail[1] = 0;
for(int i = 1; i < n; ++i)
{
int j = fail[i];
while(j && s[j] != s[i]) j = fail[j];
fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
}
st.push(n);
int cur = fail[n];
while(cur)
{
st.push(cur);
cur = fail[cur];
}
if(st.size() == 1) printf("%d\n", st.top());
else
{
printf("%d", st.top());
st.pop();
while(!st.empty())
{
printf(" %d", st.top());
st.pop();
}
puts("");
}
}
return 0;
}
I - Blue Jeans
描述:给定最多10个字符串,每个字符串长度均为60,求它们的最长公共子串中字典序最小的。最长公共子串长度小于3认为无解。
分析:枚举第一个字符串的所有子串,对每个子串对每个字符串跑KMP。
代码:
#include<cstdio>
#include<cstring>
#include<cmath>
#include<iostream>
using namespace std;
const int LEN = 60;
const int maxn = 100;
const int maxm = 15;
char s[maxm][maxn];
char p[maxn];
int fail[maxn];
void getFail(char *s)
{
fail[0] = fail[1] = 0;
int n = strlen(s);
for(int i = 1; i < n; ++i)
{
int j = fail[i];
while(j && s[j] != s[i]) j = fail[j];
fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
}
}
bool finda(char *P, char *T)
{
int n = strlen(T);
int m = strlen(P);
getFail(P);
int j = 0;
for(int i = 0; i < n; ++i)
{
while(j && P[j] != T[i]) j = fail[j];
if(P[j] == T[i]) ++j;
if(j == m) return true;
}
return false;
}
string res = "Z";
int main()
{
int T;
scanf("%d", &T);
while(T--)
{
res = "Z";
int n;
scanf("%d", &n);
for(int i = 0; i < n; ++i) scanf("%s", s[i]);
for(int len = 3; len <= LEN; ++len)
{
p[len] = '\0';
for(int i = 0; i + len - 1 < LEN; ++i)
{
bool ck = true;
strncpy(p, s[0] + i, len);
for(int j = 0; j < n && ck; ++j)
ck = ck & finda(p, s[j]);
if(ck)
{
string cur = p;
if(cur.length() > res.length()) res = cur;
else if(cur.length() == res.length()) res = min(res, cur);
}
}
}
if(res == "Z") res = "no significant commonalities";
printf("%s\n", res.c_str());
}
return 0;
}
J - Simpsons’ Hidden Talents
描述:给定两个字符串A和B,求一个字符串C,它是A的前缀,B的后缀。求最长的C。
分析:
- 解法一:将A与B拼接,求fail[n],并不断沿着fail指针跳转,直到长度小于字符串A和B的长度。
- 解法二:将A与B拼接,中间加一个特殊字符,如@
- 解法三:将A作为模式串,跑KMP,记录能到达的最大前缀即可。
代码:
下面代码中使用解法一。
#include<cstdio>
#include<cstring>
#include<cmath>
#include<iostream>
using namespace std;
const int maxn = 50000 + 50000 + 5;
string s;
char s1[maxn];
int fail[maxn];
int main()
{
while(~scanf("%s", s1))
{
s = s1;
int bound = s.size();
scanf("%s", s1);
bound = min(bound, (int)strlen(s1));
s = s + s1;
fail[0] = fail[1] = 0;
int n = s.size();
for(int i = 1; i < n; ++i)
{
int j = fail[i];
while(j && s[j] != s[i]) j = fail[j];
fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
}
int res = fail[n];
while(res && res > bound) res = fail[res];
if(res) printf("%s %d\n", s.substr(0, res).c_str(), res);
else puts("0");
}
return 0;
}
K - Count the string
描述:给定一个字符串,求它的所有前缀在该字符串中出现的次数之和,即对于前缀i,它在字符串中出现
si
s
i
次,求
∑si
∑
s
i
分析:考察每个前缀i,统计它的后缀中是前缀的个数
ti
t
i
,则
∑si=∑ti
∑
s
i
=
∑
t
i
。fail数组的一种含义为:fail[i]=j代表字符串的前i个字符中,最大的j(j小于i),使得前j个字符等于后j个字符。那么
ti
t
i
就等于
tfaili
t
fail
i
加上1(该字符串本身)。
代码:
#include<cstdio>
#include<cstring>
#include<cmath>
#include<iostream>
using namespace std;
const int maxn = 2e5 + 5;
char s[maxn];
int fail[maxn];
long long dp[maxn];
const long long MOD = 10007;
int main()
{
int T;
scanf("%d", &T);
while(T--)
{
int n;
scanf("%d%s", &n, &s);
fail[0] = fail[1] = 0;
for(int i = 1; i < n; ++i)
{
int j = fail[i];
while(j && s[j] != s[i]) j = fail[j];
fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
}
dp[0] = 0;
long long res = 0;
for(int i = 1; i <= n; ++i)
{
if(fail[i] != 0) dp[i] = (dp[fail[i]] + 1) % MOD;
else dp[i] = 1;
res = (res + dp[i]) % MOD;
}
printf("%lld\n", res);
}
return 0;
}