给定两个多项式\(f(x),g(x)\),求\(h(x)=f(x)*g(x)\)
对\(p\)取模,\(p\)不保证可以分解成\(a*2^k+1\)
三模NTT
由于模数不满足有原根,我们可以找几个有原根的模数,求出结果再\(crt\)合并一下
考虑卷积后数字最大能达到\(p*p*len\),一般是\(10^9*10^9*10^5=10^{23}\),所以我们选择模数之积应该大于\(10^{23}\)
一般考虑\(998244353,1004535809,469762049\),因为原根都是\(3\),在\(int\)范围内且乘积较大
然后跑三个\(ntt\)(天体常数)得到
\[\begin{cases}ret\equiv a_1 (mod\ p_1)\\ret\equiv a_2 (mod\ p_2)\\ret\equiv a_3 (mod\ p_3)\\ret\equiv x (mod\ p)\end{cases}
\]
我们现在就是求\(x\)
由于前三个式子直接合并爆\(long\ long\),我们用一些技巧:
先合并前两个,定义\(inv(x,p)\)为\(x\)在模\(p\)意义下的逆元
\[ret\equiv a_1*p_2*inv(p_2,p_1)+a_2*p_1*inv(p_1,p_2) (mod\ p_1*p_2)
\]
记作
\[ret\equiv d (mod\ m)
\]
则
\[ret=x*m+d=y*p_3+a_3
\]
\[x\equiv (a_3-d)*M^{-1} (mod\ p_3)
\]
设\(q=(a_3-d)*M^{-1}\),那么
\[x=k*p_3+q
\]
代入\(ret\)得到
\[ret=k*p_1*p_2*p_3+q*M+d
\]
然而\(ans\in [0,p_1*p_2*p_3)\),所以\(k=0\),\(ret\)就得出了
#include
using namespace std;
namespace red{
#define int long long
#define eps (1e-8)
inline int read()
{
int x=0;char ch,f=1;
for(ch=getchar();(ch'9')&&ch!='-';ch=getchar());
if(ch=='-') f=0,ch=getchar();
while(ch>='0'&&ch<='9'){x=(x<<1)+(x<<3)+ch-'0';ch=getchar();}
return f?x:-x;
}
const int N=4e5+10;
int mod[3]={469762049,998244353,1004535809};
int n,m,p;
int f[N],g[N],pos[N];
int b[N],ret[N];
int limit,len;
inline int fast(int x,int k,int p)
{
int ret=1;
while(k)
{
if(k&1) ret=ret*x%p;
x=x*x%p;
k>>=1;
}
return ret;
}
inline int slow(int x,int k,int p)
{
int ret=0;
while(k)
{
if(k&1) ret=(ret+x)%p;
x=(x+x)%p;
k>>=1;
}
return ret;
}
struct poly
{
int g=3,p,a[N];
inline void ntt(int limit,int *a,int inv)
{
for(int i=0;i
if(i
for(int mid=1;mid
{
int Wn=fast(inv?g:(p+1)/g,(p-1)/(mid<<1),p);
for(int r=mid<<1,j=0;j
{
int w=1;
for(int k=0;k
{
int x=a[j+k],y=w*a[j+k+mid]%p;
a[j+k]=x+y;
if(a[j+k]>=p) a[j+k]-=p;
a[j+k+mid]=x-y;
if(a[j+k+mid]<0) a[j+k+mid]+=p;
}
}
}
if(inv) return;
inv=fast(limit,p-2,p);
for(int i=0;i
}
}fft[3];
inline int inv(int x,int p)
{
return fast(x%p,p-2,p);
}
inline void crt()
{
int len=n+m;
int M=mod[0]*mod[1];
int inv1=inv(mod[1],mod[0]),inv0=inv(mod[0],mod[1]),inv3=inv(M%mod[2],mod[2]);
int a,b,c,t,k;
for(int i=0;i<=len;++i)
{
a=fft[0].a[i],b=fft[1].a[i],c=fft[2].a[i];
t=(slow(a*mod[1]%M,inv1,M)+slow(b*mod[0]%M,inv0,M))%M;
k=((c-t%mod[2])%mod[2]+mod[2])%mod[2]*inv3%mod[2];
ret[i]=((k%p)*(M%p)%p+t%p)%p;
}
}
inline void main()
{
n=read(),m=read(),p=read();
for(int i=0;i<=n;++i) f[i]=read();
for(int i=0;i<=m;++i) g[i]=read();
for(limit=1;limit<=n+m+2;limit<<=1) ++len;
for(int i=0;i>1]>>1)|((i&1)<
for(int k=0;k<=2;++k)
{
fft[k].p=mod[k];
for(int i=0;i<=n;++i) fft[k].a[i]=f[i];
for(int i=0;i<=m;++i) b[i]=g[i];
for(int i=m+1;i
fft[k].ntt(limit,fft[k].a,1);
fft[k].ntt(limit,b,1);
for(int i=0;i
fft[k].ntt(limit,fft[k].a,0);
}
crt();
for(int i=0;i<=n+m;++i) printf("%lld ",ret[i]);
}
}
signed main()
{
red::main();
return 0;
}
MTT
毛爷爷数论变换
对于多项式\(P=\sum\limits_{i=0}^{n}P_ix^i,Q=\sum\limits_{i=0}^{n}Q_ix^i\),求\(P*Q\)
考虑直接大力\(FFT\),发现\(10^{23}\)精度炸飞
考虑降低精度,我们设:
\[A=\sum\limits_{i=0}^{n}(P_i>>15)x^i,B=\sum\limits_{i=0}^{n}(P_i\&32767)x^i
\]
\[C=\sum\limits_{i=0}^{n}(Q_i>>15)x^i,D=\sum\limits_{i=0}^{n}(Q_i\&32767)x^i
\]
那么
\[P*Q=AC*2^{30}+(AD+BC)*2^{15}+BD
\]
需要\(8\)次\(FFT\)
我们设\(F=A+iC,G=B+iD\)
\[T1=F*G=AB-CD+i(AD+BC)
\]
再设\(F^{'}=A-iC\)
\[T2=F^{'}*G=AB+CD+i(AD-BC)
\]
其中
\[T1+T2=2(AB+iAD)
\]
\[T2-T1=2(CD-iBC)
\]
我们需要\(3\)次\(DFT\),\(2\)次\(IDFT\),一共\(5\)次\(FFT\)
其实还能优化!
令
\[P(x)=A(x)+iB(x)
\]
\[Q(x)=A(x)-iB(x)
\]
设\(P_t\)是\(P\)的\(DFT\),\(conj(x)\)表示\(x\)的共轭复数,\(A_i\)为\(A(x)\)的\(i\)次项系数
\[P_t(k)=A(\omega _n^k)+i B(\omega _n^k)
\]
\[=\sum\limits_{j=0}^{n-1}A_j\omega _n^{jk}+iB_j\omega _n^{jk}
\]
\[=\sum\limits_{j=0}^{n-1}(A_J+iB_j)\omega _n^{jk}
\]
\
\[Q_t(x)=A(\omega _n^k)-i B(\omega _n^k)
\]
\[=\sum\limits_{j=0}^{n-1}A_j\omega _n^{jk}-iB_j\omega _n^{jk}
\]
\[=\sum\limits_{j=0}^{n-1}(A_J-iB_j)\omega _n^{jk}
\]
\[=\sum\limits_{j=0}^{n-1}(A_j-i*B_j)(cos(\frac{2\pi jk}{n})+i*sin(\frac{2\pi jk}{n}))
\]
\[=\sum\limits_{j=0}^{n-1}(A_jcos(\frac{2\pi jk}{n})+B_jsin(\frac{2\pi jk}{n}))+i(A_jsin(\frac{2\pi jk}{n})-B_jcos(\frac{2\pi jk}{n}))
\]
\[=conj(\sum\limits_{j=0}^{n-1}(A_jcos(\frac{2\pi jk}{n})+B_jsin(\frac{2\pi jk}{n}))-i(A_jsin(\frac{2\pi jk}{n})-B_jcos(\frac{2\pi jk}{n})))
\]
\[=conj(\sum\limits_{j=0}^{n-1}(A_jcos(\frac{-2\pi jk}{n})-B_jsin(\frac{-2\pi jk}{n}))-i(A_jsin(\frac{-2\pi jk}{n})+B_jcos(\frac{-2\pi jk}{n})))
\]
\[=conj(\sum\limits_{j=0}^{n-1}(A_j+iB_j)(cos(\frac{-2\pi jk}{n})+i*sin(\frac{-2\pi jk}{n})))
\]
\[=conj(\sum\limits_{j=0}^{n-1}(A_j+iB_j)\omega _n^{-jk})
\]
\[=conj(\sum\limits_{j=0}^{n-1}(A_j+iB_j)\omega _n^{(n-j)k})
\]
\[=conj(P_t(n-k))
\]
所以我们可以用\(P\)的点值得到\(Q\),也就是说上面的\(F^{'}\)不需要进行\(FFT\)
注意\(n-k\)当\(k=0\)时需要特殊处理,因为是循环卷积,需要移到第\(0\)项
得到这个结论我们只要\(2\)次\(DFT\),\(2\)次\(IDFT\),一共\(4\)次\(FFT\)
碾爆垃圾\(9\)次\(NTT\)
注意单位根会乘爆精度,考虑预处理,或者开\(long double\)
#include
using namespace std;
namespace red{
#define int long long
#define eps (1e-8)
inline int read()
{
int x=0;char ch,f=1;
for(ch=getchar();(ch'9')&&ch!='-';ch=getchar());
if(ch=='-') f=0,ch=getchar();
while(ch>='0'&&ch<='9'){x=(x<<1)+(x<<3)+ch-'0';ch=getchar();}
return f?x:-x;
}
const int N=1<<18;
const double pi=acos(-1.0);
int n,m,p;
int pos[N];
int limit,len;
struct complex
{
double x,y;
complex(double tx=0,double ty=0){x=tx,y=ty;}
inline complex operator + (const complex &t) const
{
return complex(x+t.x,y+t.y);
}
inline complex operator - (const complex &t) const
{
return complex(x-t.x,y-t.y);
}
inline complex operator * (const complex &t) const
{
return complex(x*t.x-y*t.y,x*t.y+y*t.x);
}
inline complex operator * (const double &t) const
{
return complex(x*t,y*t);
}
inline complex operator += (const complex &t) const
{
return complex(x+t.x,y+t.y);
}
inline complex operator ~ () const
{
return complex(x,-y);
}
}f[N],g[N],a[N],b[N],w[N];
inline void fft(int limit,complex *a,int inv)
{
for(int i=0;i
if(i
for(int mid=1;mid
{
for(int r=mid<<1,j=0;j
{
for(int k=0;k
{
complex x=a[j+k],y=w[mid+k]*a[j+k+mid];
a[j+k]=x+y;
a[j+k+mid]=x-y;
}
}
}
}
inline void main()
{
n=read(),m=read(),p=read();
for(int x,i=0;i<=n;++i) x=read() , f[i].x=x>>15 , f[i].y=x&32767;
for(int x,i=0;i<=m;++i) x=read() , g[i].x=x>>15 , g[i].y=x&32767;
for(limit=1;limit<=n+m;limit<<=1) ++len;
for(int i=0;i>1]>>1)|((i&1)<
for(int i=1;i
{
w[i]=(complex){1, 0};
for(int j=1;j
w[i+j]=((j&31)==1?(complex){cos(pi*j/i), sin(pi*j/i)}:w[i+j-1]*w[i+1]);
}
fft(limit,f,1);fft(limit,g,1);
for(int i=0;i
{
static complex q,f0,f1,g0,g1;
q=~f[i?limit-i:0],f0=(f[i]-q)*(complex){0,-0.5},f1=(f[i]+q)*0.5;
q=~g[i?limit-i:0],g0=(g[i]-q)*(complex){0,-0.5},g1=(g[i]+q)*0.5;
a[i]=f1*g1,b[i]=f1*g0+f0*g1+f0*g0*(complex){0,1};
}
reverse(a+1,a+limit);reverse(b+1,b+limit);
fft(limit,a,-1),fft(limit,b,-1);
double k=1.0/limit;
for(int i=0;i<=n+m;++i)
{
printf("%lld ",(((int)(a[i].x*k+.5)%p<<30)+((int)(b[i].x*k+.5)<<15)+(int)(b[i].y*k+.5))%p);
}
}
}
signed main()
{
red::main();
return 0;
}