题意:对于给定的n和p,求对于所有的0<=i<p,满足C(n,k)%p=i的k的个数。
注:p虽然要输入,但是题目标注了所以测试点的p是固定的。
首先需要用正确的姿势理解lucas定理,比如求C(n,r)%p,就是将n和r分别转换为p进制,然后依次算组合数乘起来。n是一个高精度数,求C(n,r)的过程中,n不断模p得到的数(即n的p进制表示)是固定的。
就是长这样:n0!/((n0-k0)!*k0!) * n1!/((n1-k1)!*k1!)。。。其中n0,n1...都是定值,k0,k1...都是枚举的值,最终的k是由这些k0,k1...决定。这样容易让人想到生成函数。由于是在模意义下做,每个数都在0到p-1之间。而分母是乘起来而不是加起来,我们可以将每个数取离散对数,将乘法变成加法,但是注意是不能对0取对数的,好在0我们可以特殊处理,即Lucas定理的乘起来的若干个组合数中如果有ki>ni则为0。
然后对分母FFT一下就可以了。注意答案是模29,基本操作是在模51061下做,两个模数不要搞混了,答案模的29比较小可以直接用double来FFT,卷积起来不会溢出的。。(因为如果模998244353就太明显了)
#include<iostream>
#include<algorithm>
#include<cstdio>
#include<cstring>
#include<cmath>
#include<vector>
#define rep(i,a,b) for(int i=a;i<=b;++i)
#define erp(i,a,b) for(int i=a;i>=b;--i)
#define LL long long
#define DB double
using namespace std;
const DB PI = acos(-1);
const int mo = 51061; // primitive root of which is 7.
const int mn = 65540;
const int lim = 100000000ll;
struct bign
{
LL a[50];
int len;
bign() { memset(a, 0, sizeof a); len = 1; }
void operator /= (LL b)
{
erp(i, len, 2) a[i-1] += a[i]%b*lim, a[i] /= b;
for (a[1]/=b; len>1&&!a[len]; --len);
}
bign operator / (LL b)
{
bign t = *this; t /= b; return t;
}
LL operator % (LL b)
{
LL res = 0, tl = lim%b;
erp(i, len, 1) res = (res*tl%b + a[i]%b) % b;
return res;
}
bool zero() { return len==1&&!a[1]; }
void readstr(const char*s)
{
len = strlen(s);
for (int i = 0, j; j=(len-i-1)/8+1, i<len; ++i)
a[j] = a[j]*10 + s[i]-'0';
len = (len+7) / 8;
}
};
int jc[mn], rjc[mn], lnd[mn], pw[mn];
void math_table()
{
jc[0] = jc[1] = rjc[0] = rjc[1] = 1;
rep(i, 2, mo) jc[i]=1ll*jc[i-1]*i%mo, rjc[i]=1ll*(mo-mo/i)*rjc[mo%i]%mo;
rep(i, 2, mo) rjc[i] = 1ll*rjc[i-1]*rjc[i]%mo;
pw[0]=1; rep(i,1,mo-1) pw[i] = 7ll*pw[i-1]%mo;
rep(i,0,mo-1) lnd[pw[i]]=i;
}
int fenmu(int n, int k)
{
return lnd[1ll*rjc[k]*rjc[n-k]%mo];
}
struct cpx
{
DB r, i;
cpx() {}
cpx(DB a, DB b):r(a), i(b){};
inline cpx operator + (cpx b) { return cpx(r+b.r, i+b.i); }
inline cpx operator - (cpx b) { return cpx(r-b.r, i-b.i); }
inline cpx operator * (cpx b) { return cpx(r*b.r - i*b.i, r*b.i+i*b.r); }
};
namespace FFT
{
int r[mn*2];
cpx a[mn*2], b[mn*2];
void fft(cpx*a, int flag, int N)
{
rep(i, 0, N-1) if (i<r[i]) swap(a[i], a[r[i]]);
for (int i = 1; i<N; i<<=1)
{
cpx wn(cos(PI/i), flag*sin(PI/i));
for (int j = 0; j<N; j+=i<<1)
{
cpx w(1, 0);
for (int k = 0; k<i; ++k, w=w*wn)
{
cpx x = a[j+k], y = w*a[j+k+i];
a[j+k] = x+y, a[j+k+i] = x-y;
}
}
}
if (flag<0) rep(i, 0, N-1) a[i].r /= N;
}
void polymul(int*p1, int*p2, int m) //卷积后保存在p1中
{
rep(i, 0, mo-1) a[i]=cpx(p1[i], 0), b[i]=cpx(p2[i], 0);
rep(i, mo, m) a[i] = b[i] = cpx(0, 0);
fft(a, 1, m); fft(b, 1, m);
rep(i, 0, m-1) a[i] = a[i]*b[i];
fft(a, -1, m);
rep(i, 0, m-1) p1[i] = a[i].r+0.5;
}
}
int res[mn*2], a[mn*2], ans[mn];
void calc(vector<int> num, int tot)
{
int m = 131072, tmp = 1;
sort(num.begin(), num.end());
for (int i = 0; i<(int)num.size(); ++i)
tmp = tmp*(num[i]+1)%29;
ans[0] = (tot+29-tmp)%29;
rep(j, 0, num[0])
(++res[fenmu(num[0], j)]) %= 29;
rep(i, 0, m) FFT::r[i] = (FFT::r[i>>1]>>1)|((i&1)<<(17-1));
for (int i = 1; i<(int)num.size(); ++i)
{
memset(a, 0, sizeof a);
rep(j, 0, num[i]) (++a[fenmu(num[i], j)]) %= 29;
FFT::polymul(res, a, m);
for (int j = mo-1; j<m; res[j++]=0)
res[j%(mo-1)] += res[j]%29;
for (int j = 0; j<mo-1; res[j++]%=29);
}
int fz = 1;
for (int i = 0; i<(int)num.size(); ++i)
fz = 1ll*fz*jc[num[i]]%mo;
fz = lnd[fz];
for (int i = 0; i<mo-1; ++i)
ans[pw[(i+fz)%(mo-1)]] = res[i];
}
char mystr[2333];
char to29(int x) { x%=29; return x<10?'0'+x:'A'+x-10; }
int main()
{
scanf("%s%*d", mystr);
math_table();
vector<int> num;
bign tmp; tmp.readstr(mystr);
int tot = tmp%29 + 1;
while (!tmp.zero()) num.push_back(tmp%mo), tmp /= mo;
calc(num, tot);
for (int i = 0; i<mo; ++i) putchar(to29(ans[i]));
puts("");
return 0;
}