#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#include <bits/stdc++.h>
#include <queue>
#include <algorithm>
#include <cmath>
#include <map>
#include <stack>
using namespace std;
#define N 200200
#define M 1000020
#define LL long long
#define inf 0x3f3f3f3f
int P, G;
int rev[N];
LL t0[N], t1[N], t2[N], t3[N];
LL wnPow[N];
LL qpow(LL x, LL k) {
int ret = 1;
while(k) {
if(k & 1) ret = ret * x % P;
k >>= 1;
x = x * x % P;
}
return ret;
}
void change(LL y[], int len) {
for(int i = 1; i < len; ++i) {
rev[i] = (rev[i>>1] >> 1) + (i & 1) * (len / 2);
if(i < rev[i]) swap(y[i], y[rev[i]]);
}
}
void FFT(LL y[], int len, int on) {
change(y, len);
for(int h = 2; h <= len; h <<= 1) {
LL wn = qpow(G, (P - 1) / h);
if(on == -1) wn = qpow(wn, P - 2);
wnPow[0] = 1;
for(int j = 1; j < h / 2; ++j) wnPow[j] = wnPow[j-1] * wn % P;
for(int j = 0; j < len; j += h) {
LL u, t;
for(int k = j; k < j + h / 2; ++k) {
u = y[k];
t = y[k+h/2] * wnPow[k-j] % P;
y[k] = u + t; if(y[k] >= P) y[k] -= P;
y[k+h/2] = u - t; if(y[k+h/2] < 0) y[k+h/2] += P;
}
}
}
if(on == -1) {
LL inv = qpow(len, P - 2);
for(int i = 0; i < len; ++i) y[i] = y[i] * inv % P;
}
}
void getInv(LL A[], LL A0[], int t) {
if(t == 1) { A0[0] = qpow(A[0], P - 2); return; }
getInv(A, A0, (t + 1) / 2);
int len = 1;
while(len <= t * 2) len <<= 1;
for(int i = 0; i < len; ++i) {
if(i < t) t2[i] = A[i];
else t2[i] = 0;
}
for(int i = (t + 1) / 2; i < len; ++i) A0[i] = 0;
FFT(A0, len, 1); FFT(t2, len, 1);
for(int i = 0; i < len; ++i) {
t2[i] = 2 - A0[i] * t2[i] % P;
if(t2[i] < 0) t2[i] += P;
A0[i] = A0[i] * t2[i] % P;
}
FFT(A0, len, -1);
}
void getDiv(LL A[], LL B[], LL D[], LL R[], int n, int m) {
int len = 1;
while(len <= 2 * n) len <<= 1;
for(int i = 0; i <= n - m; ++i) D[i] = A[n-i];
for(int i = n - m + 1; i < len; ++i) D[i] = 0;
for(int i = 0; i <= m; ++i) t0[i] = B[m-i];
for(int i = m + 1; i < n - m + 1; ++i) t0[i] = 0;
getInv(t0, t1, n - m + 1);
for(int i = n - m + 1; i < len; ++i) t1[i] = 0;
FFT(t1, len, 1); FFT(D, len, 1);
for(int i = 0; i < len; ++i) D[i] = D[i] * t1[i] % P;
FFT(D, len, -1);
len /= 2;
for(int i = 0; i < (n - m + 1) / 2; ++i) swap(D[i], D[n-m-i]);
for(int i = 0; i <= n - m; ++i) t1[i] = D[i];
for(int i = n - m + 1; i < len; ++i) D[i] = t1[i] = 0;
for(int i = 0; i <= m; ++i) t0[i] = B[i];
for(int i = m + 1; i < len; ++i) t0[i] = 0;
FFT(t0, len, 1); FFT(t1, len, 1);
for(int i = 0; i < len; ++i) t0[i] = t0[i] * t1[i] % P;
FFT(t0, len, -1);
for(int i = 0; i < m; ++i) {
R[i] = A[i] - t0[i];
if(R[i] < 0) R[i] += P;
}
}
int n;
int sqr;
LL pool[N*40];
int sz;
LL x0[N], x1[N], x2[N];
LL ans[N];
LL *root[N];
LL *newp(int t) {
LL *ret = pool + sz;
sz += t;
return ret;
}
void bfCalc(LL a[], LL x[], LL y[], int n, int m) {
for(int i = 0; i <= n + m; ++i) a[i] = 0;
for(int i = 0; i <= n; ++i) {
for(int j = 0; j <= m; ++j) {
a[i+j] += x[i] * y[j] % P;
}
}
for(int i = 0; i <= n + m; ++i) a[i] %= P;
}
LL *calc(int l, int r) {
if(l == r) {
LL *ret = newp(2);
ret[0] = l;
ret[1] = 1;
return ret;
}
int mid = (l + r) / 2;
int tmpSz = sz;
LL *x = calc(l, mid);
LL *y = calc(mid + 1, r);
if(r - l + 1 <= 100) {
LL *ret = newp(r - l + 2);
bfCalc(ret, x, y, mid - l + 1, r - mid);
return ret;
}
int len = 1;
while(len <= (r - l + 2)) len <<= 1;
for(int i = 0; i < len; ++i) {
if(i <= mid - l + 1) x0[i] = x[i];
else x0[i] = 0;
if(i <= r - mid) x1[i] = y[i];
else x1[i] = 0;
}
FFT(x0, len, 1); FFT(x1, len, 1);
for(int i = 0; i < len; ++i) x0[i] = x0[i] * x1[i] % P;
FFT(x0, len, -1);
sz = tmpSz;
LL *ret = newp(r - l + 2);
for(int i = 0; i <= r - l + 1; ++i) {
ret[i] = x0[i];
}
return ret;
}
void calc1(int l, int r, int i) {
if(l == r) {
root[i] = newp(2);
root[i][0] = (P - sqr * l) % P;
root[i][1] = 1;
return;
}
int mid = (l + r) / 2;
calc1(l, mid, i << 1);
calc1(mid + 1, r, i << 1 | 1);
if(r - l + 1 <= 100) {
root[i] = newp(r - l + 2);
bfCalc(root[i], root[i<<1], root[i<<1|1], mid - l + 1, r - mid);
return;
}
int len = 1;
while(len <= (r - l + 2)) len <<= 1;
for(int j = 0; j < len; ++j) {
if(j <= mid - l + 1) x0[j] = root[i<<1][j];
else x0[j] = 0;
if(j <= r - mid) x1[j] = root[i<<1|1][j];
else x1[j] = 0;
}
FFT(x0, len, 1); FFT(x1, len, 1);
for(int j = 0; j < len; ++j) x0[j] = x0[j] * x1[j] % P;
FFT(x0, len, -1);
root[i] = newp(r - l + 2);
for(int j = 0; j <= r - l + 1; ++j) root[i][j] = x0[j];
}
void calc2(int l, int r, int i, LL *p, int degP) {
LL *y = newp(r - l + 1);
getDiv(p, root[i], x1, y, degP, r - l + 1);
if(l == r) {
ans[l] = y[0];
return;
}
int mid = (l + r) / 2;
int tmpSz = sz;
calc2(l, mid, i << 1, y, r - l);
sz = tmpSz;
calc2(mid + 1, r, i << 1 | 1, y, r - l);
sz = tmpSz;
}
bool check(int k) {
int tmp = P - 1;
for(int i = 2; i * i <= tmp; ++i) {
if(tmp % i) continue;
if(qpow(k, i) == 1 || qpow(k, tmp / i) == 1) return 0;
}
return 1;
}
int bf() {
int ret = 1;
for(int i = 1; i <= n; ++i) {
ret = 1LL * ret * i % P;
}
if(n & 1) ret = 1LL * ret * qpow(2, P - 2) % P;
return ret;
}
int main() {
scanf("%d%d", &n, &P);
clock_t x = clock();
if(n >= P) {
puts("0");
return 0;
}
G = 2;
while(!check(G)) ++G;
sqr = sqrt(n + 0.5);
LL res = 1;
for(int i = sqr * sqr + 1; i <= n; ++i)
res = res * i % P;
sz = 0;
LL *p = calc(1, sqr);
calc1(0, sqr - 1, 1);
calc2(0, sqr - 1, 1, p, sqr);
for(int i = 0; i < sqr; ++i) {
res = 1LL * res * ans[i] % P;
}
if(n & 1) res = res * qpow(2, P - 2) % P;
printf("%d\n", (int)res);
clock_t y = clock();
return 0;
}
51nod 1387 移数字(多项式除法)
最新推荐文章于 2022-02-26 16:52:46 发布