涉及知识点:多项式求逆,多项式除法,多点插值,阶乘取模。
对于N!%P,复杂度为
O(N−−√log2N−−√)
。
但常数巨大,和暴力算实际复杂度只相差常数= =
这个是可以扩展到组合数取模的~
my code:
#include <stdio.h>
#include <string.h>
#include <map>
#include <math.h>
#include <vector>
#include <algorithm>
using namespace std ;
typedef long long LL ;
typedef long long Int ;
#define clr( a , x ) memset ( a , x , sizeof a )
#define ls ( o << 1 )
#define rs ( o << 1 | 1 )
#define lson ls , l , m
#define rson rs , m + 1 , r
#define root 1 , 1 , Sqrt
#define mid ( ( l + r ) >> 1 )
const int MAXN = 300005 ;
vector < int > M[MAXN << 2] ;
vector < int > F[MAXN << 2] ;
int x1[MAXN] , x2[MAXN] , x3[MAXN] , tmp[MAXN] ;
int A[MAXN] , B[MAXN] , R[MAXN] ;
int a[MAXN] ;
int mod , g ;
int S[MAXN] , top ;
int n ;
int ans ;
int Sqrt ;
int exgcd ( int a , int b , int& x , int& y ) {
if ( b ) {
exgcd ( b , a % b , y , x ) ;
y -= a / b * x ;
} else {
x = 1 ;
y = 0 ;
}
}
int inv ( int a ) {
int x , y , b = mod ;
exgcd ( a , b , x , y ) ;
if ( x < 0 ) x += mod ;
return x ;
}
int powmod ( int a , int b ) {
int res = 1 , tmp = a ;
while ( b ) {
if ( b & 1 ) res = ( LL ) res * tmp % mod ;
tmp = ( LL ) tmp * tmp % mod ;
b >>= 1 ;
}
return res ;
}
void DFT ( int y[] , int n , int rev ) {
for ( int i = 1 , j , k , t ; i < n ; ++ i ) {
for ( j = 0 , k = n >> 1 , t = i ; k ; k >>= 1 , t >>= 1 ) j = j << 1 | t & 1 ;
if ( i < j ) swap ( y[i] , y[j] ) ;
}
for ( int s = 2 , ds = 1 ; s <= n ; ds = s , s <<= 1 ) {
int wn = powmod ( g , ( mod - 1 ) / s ) ;
if ( rev ) wn = inv ( wn ) ;
for ( int k = 0 ; k < n ; k += s ) {
LL w = 1 , t ;
for ( int i = k ; i < k + ds ; ++ i , w = w * wn % mod ) {
y[i + ds] = ( y[i] - ( t = w * y[i + ds] % mod ) + mod ) % mod ;
y[i] = ( y[i] + t ) % mod ;
}
}
}
}
void INV ( int A[] , int B[] , int n ) {
B[0] = inv ( A[0] ) ;
int i , n1 , t , vn , s , ds ;
for ( s = 2 , ds = 1 ; ds < n ; ds = s , s <<= 1 ) {
n1 = ( s << 1 ) , t = min ( s , n ) , vn = inv ( n1 ) ;
for ( i = 0 ; i < t ; ++ i ) tmp[i] = A[i] ;
for ( i = t ; i < n1 ; ++ i ) tmp[i] = 0 ;
DFT ( tmp , n1 , 0 ) ;
DFT ( B , n1 , 0 ) ;
for ( i = 0 ; i < n1 ; ++ i ) B[i] = B[i] * ( 2 - ( LL ) tmp[i] * B[i] % mod + mod ) % mod ;
DFT ( B , n1 , 1 ) ;
for ( i = 0 ; i < t ; ++ i ) B[i] = ( LL ) B[i] * vn % mod ;
for ( i = t ; i < n1 ; ++ i ) B[i] = 0 ;
}
}
void DIV ( int A[] , int B[] , int R[] , int n , int m ) {
int n1 = 1 , n2 = n - m + 1 , i ;
while ( n1 <= n * 2 ) n1 <<= 1 ;
for ( i = 0 ; i < n ; ++ i ) x1[i] = A[n - i - 1] ;
for ( i = 0 ; i < m ; ++ i ) x2[i] = B[m - i - 1] ;
for ( i = m ; i < n2 ; ++ i ) x2[i] = 0 ;
for ( i = n2 ; i < n1 ; ++ i ) x1[i] = x2[i] = 0 ;
for ( i = 0 ; i < n1 ; ++ i ) x3[i] = 0 ;
INV ( x2 , x3 , n2 ) ;
DFT ( x1 , n1 , 0 ) ;
DFT ( x3 , n1 , 0 ) ;
for ( i = 0 ; i < n1 ; ++ i ) x1[i] = ( LL ) x1[i] * x3[i] % mod ;
DFT ( x1 , n1 , 1 ) ;
int vn = inv ( n1 ) ;
for ( i = 0 ; i < n2 ; ++ i ) x2[n2 - i - 1] = ( LL ) x1[i] * vn % mod ;
for ( i = n2 ; i < n1 ; ++ i ) x2[i] = 0 ;
for ( i = m ; i < n1 ; ++ i ) B[i] = 0 ;
DFT ( x2 , n1 , 0 ) ;
DFT ( B , n1 , 0 ) ;
for ( i = 0 ; i < n1 ; ++ i ) x2[i] = ( LL ) x2[i] * B[i] % mod ;
DFT ( x2 , n1 , 1 ) ;
for ( i = 0 ; i < m - 1 ; ++ i ) {
R[i] = A[i] - ( LL ) x2[i] * vn % mod ;
if ( R[i] < 0 ) R[i] += mod ;
}
}
void preprocess ( int n ) {
top = 0 ;
int i , flag ;
for ( i = 2 ; i * i <= n ; ++ i ) {
if ( n % i == 0 ) {
S[top ++] = i ;
while ( n % i == 0 ) n /= i ;
}
}
if ( n > 1 ) S[top ++] = n ;
for ( g = 1 ; ; ++ g ) {
flag = 1 ;
for ( i = 0 ; i < top ; ++ i ) {
if ( powmod ( g , ( mod - 1 ) / S[i] ) == 1 ) {
flag = 0 ;
break ;
}
}
if ( flag ) return ;
}
}
void deal ( vector < int > & F , vector < int > & F1 , vector < int > & F2 , int sz ) {
int n = F1.size () , m = F2.size () , n1 = 1 , i ;
while ( n1 < n + m ) n1 <<= 1 ;
for ( i = 0 ; i < n ; ++ i ) x1[i] = F1[i] ;
for ( i = 0 ; i < m ; ++ i ) x2[i] = F2[i] ;
for ( i = n ; i < n1 ; ++ i ) x1[i] = 0 ;
for ( i = m ; i < n1 ; ++ i ) x2[i] = 0 ;
DFT ( x1 , n1 , 0 ) ;
DFT ( x2 , n1 , 0 ) ;
for ( i = 0 ; i < n1 ; ++ i ) x1[i] = ( LL ) x1[i] * x2[i] % mod ;
DFT ( x1 , n1 , 1 ) ;
LL vn = inv ( n1 ) ;
for ( i = 0 ; i < sz ; ++ i ) F.push_back ( x1[i] * vn % mod ) ;
}
void brute_deal ( vector < int > & F , vector < int > & F1 , vector < int > & F2 , int sz ) {
int n = F1.size () , m = F2.size () , i , j ;
for ( i = 0 ; i < sz ; ++ i ) x1[i] = 0 ;
for ( i = 0 ; i < n ; ++ i ) {
for ( j = 0 ; j < m ; ++ j ) {
x1[i + j] = ( x1[i + j] + ( LL ) F1[i] * F2[j] ) % mod ;
}
}
for ( i = 0 ; i < sz ; ++ i ) F.push_back ( x1[i] ) ;
}
void build ( int o , int l , int r ) {
if ( l == r ) {
M[o].push_back ( ( mod - a[l] ) % mod ) ;
M[o].push_back ( 1 ) ;
F[o].push_back ( l ) ;
F[o].push_back ( 1 ) ;
return ;
}
int m = mid , n = r - l + 2 ;
build ( lson ) ;
build ( rson ) ;
if ( n <= 1400 ) {
brute_deal ( M[o] , M[ls] , M[rs] , n ) ;
brute_deal ( F[o] , F[ls] , F[rs] , n ) ;
return ;
}
deal ( M[o] , M[ls] , M[rs] , n ) ;
deal ( F[o] , F[ls] , F[rs] , n ) ;
}
void get ( int A[] , vector < int > & F , int n ) {
for ( int i = 0 ; i < n ; ++ i ) A[i] = F[i] ;
}
void go ( int o , int l , int r ) {
int m = mid , i , j ;
int n = r - l + 2 , nL = F[ls].size () , nR = F[rs].size () ;
get ( A , F[o] , n ) ;
if ( n <= 500 ) {
for ( i = l ; i <= r ; ++ i ) {
LL x = 0 , y = 1 ;
for ( j = 0 ; j < n ; ++ j ) {
x = ( x + A[j] * y ) % mod ;
y = y * a[i] % mod ;
}
ans = ans * x % mod ;
}
return ;
}
get ( B , M[ls] , nL ) ;
DIV ( A , B , R , n , nL ) ;
for ( i = 0 ; i < nL ; ++ i ) F[ls][i] = R[i] ;
F[ls][nL - 1] = 0 ;
get ( B , M[rs] , nR ) ;
DIV ( A , B , R , n , nR ) ;
for ( i = 0 ; i < nR ; ++ i ) F[rs][i] = R[i] ;
F[rs][nR - 1] = 0 ;
go ( lson ) ;
go ( rson ) ;
}
void calc ( int n ) {
Sqrt = sqrt ( 1.0 * n ) ;
for ( int i = 0 ; i < Sqrt ; ++ i ) a[i + 1] = Sqrt * i % mod ;
build ( root ) ;
// printf ( "ok\n" ) ;
go ( root ) ;
for ( int i = Sqrt * Sqrt + 1 ; i <= n ; ++ i ) ans = ( LL ) ans * i % mod ;
}
void solve () {
scanf ( "%d%d" , &n , &mod ) ;
if ( n >= mod ) {
printf ( "0\n" ) ;
return ;
}
ans = 1 ;
preprocess ( mod - 1 ) ;
calc ( n ) ;
if ( n & 1 ) ans = ( LL ) ans * inv ( 2 ) % mod ;
printf ( "%d\n" , ans ) ;
}
int main () {
solve () ;
return 0 ;
}