SPOJ - DISUBSTR Distinct Substrings(后缀数组)

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

题意: 给出一个字符串, 询问里面本质不同的串的个数

思路: 我们知道字符串的每一个后缀的所有前缀即是该字符串的所有子串, 所以不考虑重复即有n * (n + 1) / 2个子串

考虑重复: 这时求出height, 每一个相同的字符意味着前缀减少了一个相同的。

ans = n * (n + 1) / 2 \ - \sum_{i = 2}^{n}height[i]

#include <iostream>
//#include <unordered_map>
#include <time.h>
#include <algorithm>
#include <stdio.h>
#include <string.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;

#ifdef LOCAL
#define debug(x) cout << "[" __FUNCTION__ ": " #x " = " << (x) << "]\n"
#define TIME cout << "RuningTime: " << clock() << "ms\n", 0
#else
#define TIME 0
#endif
#define hash_ 1000000009
#define Continue(x) { x; continue; }
#define Break(x) { x; break; }
const int mod = 1e9 + 7;
const int N = 5e4 + 10;
const int INF = 0x3f3f3f3f;
const ll LINF = 0x3f3f3f3f3f3f3f3f;
#define gc p1 == p2 && (p2 = (p1 = buf) + fread(buf, 1, 1000000, stdin), p1 == p2) ? EOF : *p1++;
inline int read(){ static char buf[1000000], *p1 = buf, *p2 = buf; register int x = false; register char ch = gc; register bool sgn = false; while (ch != '-' && (ch < '0' || ch > '9')) ch = gc; if (ch == '-') sgn = true, ch = gc; while (ch >= '0'&& ch <= '9') x = (x << 1) + (x << 3) + (ch ^ 48), ch = gc; return sgn ? -x : x; }
ll fpow(ll a, int b, int mod) { ll res = 1; for (; b > 0; b >>= 1) { if (b & 1) res = res * a % mod; a = a * a % mod; } return res; }
int n, k;
int MX;
char s[N];
struct SA{
	int str[N]; // 原串信息
	int n, r; //n字符串长度 r基数
	int sa[N]; //排名为i的后缀位置+1 i取值1~n
	int cnt[N]; //基数排序辅助数组
	int rak[N]; //第i个后缀的排名
	int tmp[N]; //rak的辅助数组
	int heig[N]; //后缀排序相邻LCP
	void radix_sort(int *rk, int *tp)
	{
		memset(cnt, 0, sizeof cnt);
		for (int i = 1; i <= n; i++)
			cnt[rk[tp[i]]]++;
		for (int i = 1; i <= r; i++)
			cnt[i] += cnt[i - 1];
		for (int i = n; i >= 1; i--)
			sa[cnt[rk[tp[i]]]--] = tp[i];
	}
	void suffix()
	{
		int *rk = rak, *tp = tmp;
		for (int i = 1; i <= n; i++)
			rk[i] = str[i], tp[i] = i; // 默认初始顺序 
		r = 127; // 0 ~ 127
		radix_sort(rk, tp);
		for (int l = 1, p = 1, i; p < n; l <<= 1, r = p)
		{
			for (p = 0, i = n - l + 1; i <= n; i++)
				tp[++p] = i;
			for (i = 1; i <= n; i++)
				if (sa[i] > l)
					p++, tp[p] = sa[i] - l;
			radix_sort(rk, tp);
			swap(rk, tp);
			rk[sa[1]] = p = 1;
			for (i = 2; i <= n; i++)
			{
				if (tp[sa[i]] != tp[sa[i - 1]] || tp[sa[i] + l] != tp[sa[i - 1] + l])
					p++;
				rk[sa[i]] = p;
			}
		}
	}
	void get_height()//heig[i]为str[sa[i-1]]与str[sa[i]]的最长公共前缀
	{
		for (int i = 1; i <= n; i++) //*
			rak[sa[i]] = i;
		int k = 0;
		for (int i = 1; i <= n; i++)
		{
			if (k)   // heig[i] >= heig[i - 1] - 1
				k--;
			int j = sa[rak[i] - 1];
			while (str[i + k] == str[j + k])
				k++;
			heig[rak[i]] = k;
		}
	}
}sa;
int main()
{
#ifdef LOCAL
	freopen("D:/input.txt", "r", stdin);
#endif
	int t;
	cin >> t;
	while (t--)
	{
		scanf("%s", s + 1);
		sa.n = strlen(s + 1);
		for (int i = 1; i <= sa.n; i++)
			sa.str[i] = s[i];
		sa.suffix();
		sa.get_height();
		ll ans = sa.n * (sa.n + 1) / 2;
		for (int i = 1; i <= sa.n; i++)
			ans -= sa.heig[i];
		cout << ans << endl;
	}
	return TIME;
}

 

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值