有限域上多项式乘法的快速实现

问题

多项式环 R = G F [ p d ] [ x ] / ( x n − 1 ) R=GF[p^d][x]/(x^n-1) R=GF[pd][x]/(xn1),其中有限域 G F ( p d ) ≅ Z p [ x ] / ( m ( x ) ) GF(p^d) \cong Z_p[x]/(m(x)) GF(pd)Zp[x]/(m(x)) m ( x ) m(x) m(x) Z p Z_p Zp d d d次不可约多项式。对于任意 f , g ∈ R f,g \in R f,gR,如何快速计算 f ⋅ g f\cdot g fg

NTL

代码
#include <iostream>
#include "tools.h"

#include <NTL/ZZ_p.h> // integers mod p
#include <NTL/ZZ_pX.h> // polynomials over ZZ_p
#include <NTL/ZZ_pE.h> // ring/field extension of ZZ_p
#include <NTL/ZZ_pEX.h> // polynomials over ZZ_pE
#include <NTL/ZZ_pXFactoring.h>
#include <NTL/ZZ_pEXFactoring.h>

using namespace std;
using namespace NTL;

#pragma comment(lib, "NTL")

int main()
{
	ZZ p(2); //初始化为2

	//群Z_p
	ZZ_p::init(p);

	//随机生成Z_p[x]中的d次不可约多项式
	int d = 149;
	ZZ_pX m;
	//BuildIrred(m, d);
	SetCoeff(m, d);
	SetCoeff(m, 10);
	SetCoeff(m, 9);
	SetCoeff(m, 7);
	SetCoeff(m, 0);

	//域GF(p^d) = Z_p[x]/m(x)
	ZZ_pE::init(m);

	//GF(p^d)[x]中的多项式
	ZZ_pEX f, g, h, P;
	int n = 53;

	//P(x)=x^n-1
	SetCoeff(P, 0, -1);
	SetCoeff(P, n);

	// f(x) = x^8 - 1
	SetCoeff(f, 8); //将 x^8 系数置为 1
	SetCoeff(f, 0, -1); //将 x^0 系数置为 -1

	//随机生成n长多项式g(x)
	random(g, n-1);

	// 环上多项式的运算
	h = g * g;

	cout << "p = " << p << endl;
	cout << "d = " << d << endl;
	cout << "n = " << n << endl;
	cout << "m(x) = " << m << endl;
	cout << "P(x) = " << P << endl;

	//cout << "f = " << f << endl;
	//cout << "g = " << g << endl;
	//cout << "h = " << h << endl;

	int loop = 10;

	printf("%d rounds, NTL mul: ", loop);
	Loop(10, f*g);

	printf("%d rounds, NTL mod: ", loop);
	Loop(10, h%P);


	return 0;
}
结果
p = 2
d = 149
n = 53
m(x) = [1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
P(x) = [[1] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [1]]
10 rounds, NTL mul: 5.20162 s
10 rounds, NTL mod: 17.3201 s

NTT+Barrett

代码
IncompleteNTT
#include "IncompleteNTT.h"


int32 brv(int32 b, int32 l)
{
	int32 bb = 0;

	for (int32 i = 0; i < l; i++)
	{
		bb <<= 1;
		bb |= (b & 1);
		b >>= 1;
	}

	return bb;
}

//int64 exgcd(int64* x, int64* y, int64 a, int64 b)
//{
//	if (b == 0)
//	{
//		*x = 1;
//		*y = 0;
//		return a;
//	}
//	int64 ret = exgcd(x, y, b, a%b);
//	int64 tmp = *x;
//	*x = *y;
//	*y = tmp - (a / b) * (*y);
//	return ret;
//}

//int32 deg(Poly& f)
//{
//	int32 d = f.size() - 1;
//	int64* pf = f.data();
//
//	while (pf[d] == 0)
//		d--;
//	return d;
//}

ostream& operator<<(ostream& cout, Poly& f)
{
	int32 d = deg(f);
	int64* pf = f.data();

	printf("[ ");
	for (int32 i = 0; i < d; i++)
		printf("%lld, ", pf[i]);
	printf("%lld ]", pf[d]);

	return cout;
}

ostream& print_vector(int64* f, int32 len)
{
	len--;

	printf("[ ");
	for (int32 i = 0; i < len; i++)
		printf("%lld, ", f[i]);
	printf("%lld ]", f[len]);

	return std::cout;
}


//############################# 分割线 ###############################


void IncompleteNTT::init(int64 p, int64 w, int32 k, int32 h)
{
	this->p = p;
	this->w = w;
	this->k = k;
	this->h = h;
	this->m = (1L << k);
	this->n = this->h * this->m;

	int64 minv, pinv;
	int64 gcd = exgcd(&minv, &pinv, m, p);
	if (gcd != 1)
		ErrorInfo("%s\n", "gcd(2^k, p) != 1");
	this->factor = minv < 0 ? minv + p : minv;
	this->factor_pre = PreCompute(this->factor, p);
	this->one_pre = PreComputeMod(p);

	this->W.resize(this->m + 1); //重复存储:w^{0} = w^{m}
	this->W_pre.resize(this->m + 1);

	int64 *pW = W.data();
	int64 *pW_pre = W_pre.data();

	int64 wi = 1;	//单位根的幂次
	for (int32 i = 0; i <= this->m; i++)
	{
		pW[i] = wi;
		pW_pre[i] = PreCompute(wi, p);
		wi = (wi * w) % p;
	}

	brvlist.resize(k + 1);
	for (int j = 0; j <= k; j++)
	{
		auto& list = brvlist[j];
		int32 len = (1L << j);
		for (int i = 0; i < len; i++)
			list.push_back(brv(i, j));
	}
}




void IncompleteNTT::FwdNTT(NTTRep& F, Poly& f)
{
	int32 d = deg(f);
	if (d >= this->n)
		ErrorInfo("%s\n", "deg(f) > n - 1");

	if (F.size() < this->n)
		F.resize(this->n);

	int64* pF = F.data();
	int64* pf = f.data();

	FwdNTT(pF, pf, f.size());
}

void IncompleteNTT::FwdNTT(int64* pF, int64* pf, int32 flen)
{
	if (pF != pf)
	{
		int32 nn = min(flen, n);
		for (int32 i = 0; i < nn; i++)
			pF[i] = BarrettMod(pf[i], one_pre, p);
	}

	int64* pW = W.data();
	int64* pW_pre = W_pre.data();

	int64 P = 2 * p;
	for (int32 j = 0; j < k; j++)
	{
		int32 blocknum = 1 << j; //对j层分块
		int32 blocksize = this->n >> j; //对j层块大小
		int32 offset = blocksize >> 1; //计算第j+1层时蝴蝶的偏移量(块大小的一半)
		int32* list = brvlist[j].data();

		if (offset >= 4 * h)
			for (int32 i = 0; i < blocknum; i++)
			{
				int64 X, WY;
				int32 s = i * blocksize; //块的起始位置
				int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[list[i] << (k - j - 1)];

				int64* pX = pF + (s);
				int64* pY = pF + (s + offset);

				for (int32 k = 0; k < offset; k += 4)
				{
					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;

					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;

					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;

					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;
				}
			}
		else if (offset == 2 * h)
			for (int32 i = 0; i < blocknum; i++)
			{
				int64 X, WY;
				int32 s = i * blocksize; //块的起始位置
				int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[list[i] << (k - j - 1)];

				int64* pX = pF + (s);
				int64* pY = pF + (s + offset);

				for (int32 k = 0; k < h; k++)
				{
					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;

					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;
				}
			}
		else
			for (int32 i = 0; i < blocknum; i++)
			{
				int64 X, WY;
				int32 s = i * blocksize; //块的起始位置
				int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[list[i] << (k - j - 1)];

				int64* pX = pF + (s);
				int64* pY = pF + (s + offset);

				for (int32 k = 0; k < h; k++)
				{
					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;
				}
			}
	}

	//迭代结束,结果是m=2^k个h长多项式
	//约束范围到[0,2p)
	if (k >= 2)
		for (int32 i = 0; i < n; i += 4)
		{
			*pF -= ((P - *pF) >> 63)&P;
			pF++;

			*pF -= ((P - *pF) >> 63)&P;
			pF++;

			*pF -= ((P - *pF) >> 63)&P;
			pF++;

			*pF -= ((P - *pF) >> 63)&P;
			pF++;
		}
	else
		for (int32 i = 0; i < n; i++)
		{
			*pF -= ((P - *pF) >> 63)&P;
			pF++;
		}
}



void IncompleteNTT::InvNTT(Poly& f, NTTRep& F)
{
	int32 d = deg(F);
	if (d >= this->n)
		ErrorInfo("%s\n", "deg(F) > n - 1");

	if (f.size() < this->n)
		f.resize(this->n);

	int64* pF = F.data();
	int64* pf = f.data();

	InvNTT(pf, pF);
}

void IncompleteNTT::InvNTT(int64* pf, int64* pF)
{
	if (pF != pf)
	{
		for (int32 i = 0; i < n; i++)
			pf[i] = pF[i];
	}

	int64* pW = W.data();
	int64* pW_pre = W_pre.data();

	int64 P = 2 * p;
	for (int32 j = k - 1; j >= 0; j--)
	{
		int32 blocknum = 1 << j; //对j+1层分块
		int32 blocksize = this->n >> j; //第j+1层块大小
		int32 offset = blocksize >> 1; //计算第j层时蝴蝶的偏移量(块大小的一半)
		int32* list = brvlist[j].data();

		if (offset >= 4 * h)
			for (int32 i = 0; i < blocknum; i++)
			{
				int64 XY, T;
				int32 s = i * blocksize; //块的起始位置
				int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];

				int64* pX = pf + (s);
				int64* pY = pf + (s + offset);

				for (int32 k = 0; k < offset; k += 4)
				{
					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;

					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;

					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;

					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;
				}
			}
		else if (offset == 2 * h)
			for (int32 i = 0; i < blocknum; i++)
			{
				int64 XY, T;
				int32 s = i * blocksize; //块的起始位置
				int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];

				int64* pX = pf + (s);
				int64* pY = pf + (s + offset);

				for (int32 k = 0; k < h; k++)
				{
					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;

					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;
				}
			}
		else
			for (int32 i = 0; i < blocknum; i++)
			{
				int64 XY, T;
				int32 s = i * blocksize; //块的起始位置
				int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];

				int64* pX = pf + (s);
				int64* pY = pf + (s + offset);

				for (int32 k = 0; k < h; k++)
				{
					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;
				}
			}
	}

	//迭代结束,再整理一下
	int64 tmp;
	if (k >= 2)
		for (int32 i = 0; i < n; i += 4)
		{
			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;

			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;

			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;

			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;
		}
	else
		for (int32 i = 0; i < n; i++)
		{
			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;
		}
}


void IncompleteNTT::convolution(int64* FG, int64* F, int64* G, int64 w)
{
	for (int32 i = 0; i < h; i++)
	{
		FG[i] = 0;
		int32 s = h + i;
		for (int32 j = 0; j <= i; j++)
			FG[i] = BarrettMod(FG[i] + G[j] * F[i - j], one_pre, p); //冗余,[0,2p)
		for (int32 j = i + 1; j < h; j++)
		{
			int64 tmp = FG[i] + BarrettMod(w * G[j], one_pre, p) * F[s - j];
			FG[i] = BarrettMod(tmp, one_pre, p); //冗余,[0,2p)
		}
	}
}


void IncompleteNTT::NTTMul(NTTRep& FG, NTTRep& F, NTTRep& G)
{
	if (F.size() < n || G.size() < n)
		ErrorInfo("%s\n", "F.size < n or G.size < n");

	if (FG.size() < n)
		FG.resize(n);

	int64* pFG = FG.data();
	int64* pF = F.data();
	int64* pG = G.data();

	NTTMul(pFG, pF, pG);
}

void IncompleteNTT::NTTMul(int64* pFG, int64* pF, int64* pG)
{
	int64* pW = W.data();

	int64 ww;
	int64 r0, r1;
	int64 P = 2 * p;
	int32* list = brvlist[k].data();
	if (h == 1)
		if (k >= 2)
			for (int32 i = 0; i < m; i += 4) //m个h长小多项式
			{
				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;

				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;

				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;

				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;
			}
		else
			for (int32 i = 0; i < m; i++) //m个h长小多项式
			{
				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;
			}
	else if (h == 2)
		if (k >= 2)
			for (int32 i = 0; i < m; i += 4) //m个h长小多项式
			{
				int64 tmp;

				//使用冗余表示,[0,2p)
				ww = pW[list[i]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}

				tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;

				//使用冗余表示,[0,2p)
				ww = pW[list[i + 1]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}

				tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;

				//使用冗余表示,[0,2p)
				ww = pW[list[i + 2]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}

				tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;

				//使用冗余表示,[0,2p)
				ww = pW[list[i + 3]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}

				tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;
			}
		else
			for (int32 i = 0; i < m; i++) //m个h长小多项式
			{
				//使用冗余表示,[0,2p)
				ww = pW[list[i]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}

				int64 tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;
			}
	else
		for (int32 i = 0; i < m; i++) //m个h长小多项式
		{
			ww = pW[list[i]]; //第k层第i个多项式使用的单位根,w_{2^k}^{brv_k(i)}
			convolution(pFG, pF, pG, ww);

			pFG += h;
			pF += h;
			pG += h;
		}
}

void IncompleteNTT::Param()
{
	cout << "p = " << p; pn;
	cout << "w = " << w; pn;
	cout << "k = " << k; pn;
	cout << "h = " << h; pn;
	cout << "n = " << n; pn;
	cout << "factor = " << factor; pn;
}


//############################# 分割线 ###############################

void NegacyclicNTT::init(int64 p, int64 w, int32 k, int32 h, bool tag)
{
	if (tag == 0)
		IncompleteNTT::init(p, w, k, 2 * h); //构建父类IncompleteNTT,并初始化
	else
		IncompleteNTT::init(p, w, k + 1, h); //构建父类IncompleteNTT,并初始化

	hh = h;
	hm = m >> 1;
	hn = n >> 1;

	int64 hminv, pinv;
	int64 gcd = exgcd(&hminv, &pinv, hm, p);
	if (gcd != 1)
		ErrorInfo("%s\n", "gcd(2^{k-1}, p) != 1");
	this->factor = hminv < 0 ? hminv + p : hminv;
	this->factor_pre = PreCompute(this->factor, p);
}


void NegacyclicNTT::FwdNTT(NTTRep& F, Poly& f)
{
	int32 d = deg(f);
	if (d >= this->hn)
		ErrorInfo("%s\n", "deg(f) > hn - 1");

	if (F.size() < this->hn)
		F.resize(this->hn);

	int64* pF = F.data();
	int64* pf = f.data();

	FwdNTT(pF, pf, f.size());
}

void NegacyclicNTT::FwdNTT(int64* pF, int64* pf, int32 flen)
{
	if (pF != pf)
	{
		int32 nn = min(flen, hn);
		for (int32 i = 0; i < nn; i++)
			pF[i] = BarrettMod(pf[i], one_pre, p);
	}

	int64* pW = W.data();
	int64* pW_pre = W_pre.data();

	int64 P = 2 * p;
	for (int32 j = 1; j < k; j++) //从j=1层开始迭代
	{
		int32 blocknum = 1 << j; //对j层分块
		int32 blocksize = this->n >> j; //第j层块大小
		int32 offset = blocksize >> 1; //计算第j+1层时蝴蝶的偏移量(块大小的一半)
		int32* list = brvlist[j].data();

		if (offset >= 4 * hh)
			for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
			{
				int64 X, WY;
				int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
				int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[list[i] << (k - j - 1)];

				int64* pX = pF + (s);
				int64* pY = pF + (s + offset);

				for (int32 k = 0; k < offset; k += 4)
				{
					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;

					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;

					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;

					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;
				}
			}
		else if (offset == 2 * hh)
			for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
			{
				int64 X, WY;
				int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
				int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[list[i] << (k - j - 1)];

				int64* pX = pF + (s);
				int64* pY = pF + (s + offset);

				for (int32 k = 0; k < hh; k++)
				{
					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;

					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;
				}
			}
		else
			for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
			{
				int64 X, WY;
				int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
				int64 ww = pW[list[i] << (k - j - 1)]; //第j层第i个分块使用的单位根,w_{2^{j+1}}^{brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[list[i] << (k - j - 1)];

				int64* pX = pF + (s);
				int64* pY = pF + (s + offset);

				for (int32 k = 0; k < hh; k++)
				{
					//CT蝴蝶(Harvey,输入输出范围[0,4p))
					X = *pX;
					X -= ((P - X) >> 63)&P;
					WY = BarrettMulMod(*pY, ww, ww_pre, p); //Barrett算法
					*pX = X + WY;
					*pY = X + (P - WY);
					pX++;
					pY++;
				}
			}
	}

	//迭代结束,结果是hm=2^{k-1}个h长多项式
	//约束范围到[0,2p)
	if (k >= 2)
		for (int32 i = 0; i < hn; i += 4)
		{
			*pF -= ((P - *pF) >> 63)&P;
			pF++;

			*pF -= ((P - *pF) >> 63)&P;
			pF++;

			*pF -= ((P - *pF) >> 63)&P;
			pF++;

			*pF -= ((P - *pF) >> 63)&P;
			pF++;
		}
	else
		for (int32 i = 0; i < hn; i++)
		{
			*pF -= ((P - *pF) >> 63)&P;
			pF++;
		}
}



void NegacyclicNTT::InvNTT(Poly& f, NTTRep& F)
{
	int32 d = deg(F);
	if (d >= this->hn)
		ErrorInfo("%s\n", "deg(F) > hn - 1");

	if (f.size() < this->hn)
		f.resize(this->hn);

	int64* pF = F.data();
	int64* pf = f.data();

	InvNTT(pf, pF);
}

void NegacyclicNTT::InvNTT(int64* pf, int64* pF)
{
	if (pF != pf)
	{
		for (int32 i = 0; i < hn; i++)
			pf[i] = pF[i];
	}

	int64* pW = W.data();
	int64* pW_pre = W_pre.data();

	int64 P = 2 * p;
	for (int32 j = k - 1; j >= 1; j--) //回到第j=1层
	{
		int32 blocknum = 1 << j; //对j+1层分块
		int32 blocksize = this->n >> j; //第j+1层块大小
		int32 offset = blocksize >> 1; //计算第j层时蝴蝶的偏移量(块大小的一半)
		int32* list = brvlist[j].data();

		if (offset >= 4 * hh)
			for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
			{
				int64 XY, T;
				int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
				int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];

				int64* pX = pf + (s);
				int64* pY = pf + (s + offset);

				for (int32 k = 0; k < offset; k += 4)
				{
					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;

					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;

					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;

					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;
				}
			}
		else if (offset == 2 * hh)
			for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
			{
				int64 XY, T;
				int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
				int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];

				int64* pX = pf + (s);
				int64* pY = pf + (s + offset);

				for (int32 k = 0; k < hh; k++)
				{
					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;

					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;
				}
			}
		else
			for (int32 i = blocknum >> 1; i < blocknum; i++) //只处理下半部分
			{
				int64 XY, T;
				int32 s = (i - (blocknum >> 1)) * blocksize; //块的起始位置
				int64 ww = pW[m - (list[i] << (k - j - 1))]; //第j+1层第i个分块使用的单位根,w_{2^{j+1}}^{-brv_{j+1}(2i)}
				int64 ww_pre = pW_pre[m - (list[i] << (k - j - 1))];

				int64* pX = pf + (s);
				int64* pY = pf + (s + offset);
				for (int32 k = 0; k < hh; k++)
				{
					//GS蝴蝶(Harvey,输入输出范围[0,2p))
					XY = *pX + *pY;
					XY -= ((P - XY) >> 63)&P;
					T = *pX + (P - *pY);
					*pX = XY;
					*pY = BarrettMulMod(T, ww, ww_pre, p); //Barrett算法
					pX++;
					pY++;
				}
			}
	}

	//迭代结束,再整理一下
	int64 tmp;
	if (k >= 2)
		for (int32 i = 0; i < hn; i += 4)
		{
			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;

			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;

			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;

			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;
		}
	else
		for (int32 i = 0; i < hn; i++)
		{
			tmp = BarrettMulMod(*pf, factor, factor_pre, p);
			*pf = tmp - (tmp >= p)*p;
			pf++;
		}
}



void NegacyclicNTT::NTTMul(NTTRep& FG, NTTRep& F, NTTRep& G)
{
	if (F.size() < hn || G.size() < hn)
		ErrorInfo("%s\n", "F.size < n or G.size < n");

	if (FG.size() < hn)
		FG.resize(hn);

	int64* pFG = FG.data();
	int64* pF = F.data();
	int64* pG = G.data();

	NTTMul(pFG, pF, pG);
}

void NegacyclicNTT::NTTMul(int64* pFG, int64* pF, int64* pG)
{
	int64* pW = W.data();

	int64 ww;
	int64 r0, r1;
	int64 P = 2 * p;
	int32* list = brvlist[k].data() + hm;
	if (h == 1)
		if (k >= 2)
			for (int32 i = 0; i < hm; i += 4) //m个h长小多项式
			{
				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;

				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;

				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;

				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;
			}
		else
			for (int32 i = 0; i < hm; i++) //m个h长小多项式
			{
				//使用冗余表示,[0,2p)
				*pFG = BarrettMod(*pF * *pG, one_pre, p);

				pFG++;
				pF++;
				pG++;
			}
	else if (h == 2)
		if (k >= 3)
			for (int32 i = 0; i < hm; i += 4) //hm个h长小多项式
			{
				int64 tmp;

				//使用冗余表示,[0,2p)
				ww = pW[list[i]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}

				tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;

				//使用冗余表示,[0,2p)
				ww = pW[list[i + 1]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}

				tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;

				//使用冗余表示,[0,2p)
				ww = pW[list[i + 2]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}

				tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;

				//使用冗余表示,[0,2p)
				ww = pW[list[i + 3]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}

				tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;
			}
		else
			for (int32 i = 0; i < hm; i++) //hm个h长小多项式
			{
				//使用冗余表示,[0,2p)
				ww = pW[list[i]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}

				int64 tmp = ww * BarrettMod(pF[1] * pG[1], one_pre, p);
				r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(tmp, one_pre, p);
				//r0 = BarrettMod(pF[0] * pG[0], one_pre, p) + BarrettMod(ww * BarrettMod(pF[1] * pG[1], one_pre, p), one_pre, p);
				r1 = BarrettMod(pF[0] * pG[1], one_pre, p) + BarrettMod(pF[1] * pG[0], one_pre, p);
				pFG[0] = r0 - (((P - r0) >> 63)&P);
				pFG[1] = r1 - (((P - r1) >> 63)&P);

				pFG += 2;
				pF += 2;
				pG += 2;
			}
	else
		for (int32 i = 0; i < hm; i++) //hm个h长小多项式
		{
			ww = pW[list[i]]; //第k层第2^{k-1}+i个多项式使用的单位根,w_{2^k}^{brv_k(2^{k-1}+i)}
			convolution(pFG, pF, pG, ww);

			pFG += h;
			pF += h;
			pG += h;
		}
}
PolyBarrett
#include "PolyBarrett.h"


// h = f / g mod p
void Div(Poly& h, Poly f, Poly g, int64 p)
{
	int df = deg(f);
	int dg = deg(g);

	int64 a = g[dg];
	int64 ainv = inv(a, p);

	int64* pf = f.data();
	int64* pg = g.data();

	for (int i = df; i >= dg; i--)
	{
		int64 b = pf[i];
		int64 c = (b * ainv) % p;
		h[i - dg] = c;
		pf[i] = 0;

		if (c == 0)
			continue;

		int64* ptr = pf + (i - dg);
		for (int j = 0; j < dg; j++)
		{
			ptr[j] -= c * pg[j];
			ptr[j] %= p;
			ptr[j] = ptr[j] < 0 ? ptr[j] + p : ptr[j];
		}
	}
}


void PolyBarrett::init(uint64 p, Poly& g, int32 acc, int32 h)
{
	this->p = p;
	this->dg = deg(g);
	this->acc = acc;
	this->k = dg + acc;
	this->g = g;

	this->N = 2 * dg + acc;	//因为需要计算f*m
	this->H = h;
	this->K = ceil(log2(N / h));
	this->n = pow(2, K)*H;

	uint64 x = (this->n * p*p) >> K;	// P > n*p^2
	do {
		x += 1;
		P = (x << K) + 1;
	} while (!Miller_Rabin(P, 10));	// 2^k | P-1

	int HN = pow(2, K) / 2;
	for (W = 2; W < P; W++)
		if (pow_mod(W, HN, P) == P - 1)	// order(w)=2^K
			break;

	Poly xk(k + 1);
	xk[k] = 1;
	this->m.resize(this->acc + 1);
	Div(this->m, xk, g, p);

	printf("Barrett of Polynomials: acc=%d, dg=%d, p=%lld -> ", acc, dg, p);
	printf("P=%lld, W=%lld, K=%d, H=%d.\n", P, W, K, H);

	this->NTT.init(P, W, K, H);
	this->NTT.FwdNTT(this->M, this->m);
	this->NTT.FwdNTT(this->G, this->g);
}


void PolyBarrett::Mod(int64* h, int64* f, int flen)
{
	Poly t;
	NTTRep T, F(this->n);

	NTT.FwdNTT(F.data(), f, flen);
	NTT.NTTMul(T, F, M);
	NTT.InvNTT(t, T);

	int64* pt = t.data();
	for (int i = 0; i < this->n - this->k; i++)
	{
		pt[i] = pt[i + k] % p;
	}
	for (int i = this->n - this->k; i < this->n; i++)
	{
		pt[i] = 0;
	}

	NTT.FwdNTT(T, t);
	NTT.NTTMul(T, T, G);
	NTT.InvNTT(t, T);

	for (int i = 0; i < this->dg; i++)
	{
		h[i] = (f[i] - pt[i]) % p;
		h[i] += h[i] < 0 ? p : 0;
	}
}
GFX
#include "GFX.h"


//g = f mod (x^n - 1)
void ModXnSubOne(GFX& g, GFX& f, int n)
{
	int fn = f.n;
	int fd = f.d;
	int fp = f.p;

	if (&g != &f)
		for (int i = 0; i < fn; i++)
		{
			int64* p1 = g.rep[i%n].data();
			int64* p2 = f.rep[i].data();
			for (int j = 0; j < fd; j++)
			{
				p1[j] += p2[j];
				p1[j] %= fp;
			}
		}
	else
		for (int i = n; i < fn; i++)
		{
			int64* p1 = g.rep[i%n].data();
			int64* p2 = f.rep[i].data();
			for (int j = 0; j < fd; j++)
			{
				p1[j] += p2[j];
				p1[j] %= fp;
				p2[j] = 0;
			}
		}
}

//g = f mod m(x),p
void Modm(int64* g, int64* f, int flen, Poly& m, uint64 p)
{
	int df;
	for (df = flen - 1; df >= 0; df--)
		if (f[df] != 0)
			break;
	int dm = deg(m);

	if (df < dm)
	{
		for (int i = 0; i <= df; i++)
			g[i] = f[i] % p;
		return;
	}

	int64* pf = new int64[df + 1];
	for (int i = 0; i < df + 1; i++)
		pf[i] = f[i];

	int64 a = m[dm];
	int64 ainv = inv(a, p);
	int64* pm = m.data();

	for (int i = df; i >= dm; i--)
	{
		int64 b = pf[i];
		int64 c = (b * ainv) % p;
		g[i - dm] = c;
		pf[i] = 0;

		if (c == 0)
			continue;

		int64* ptr = pf + (i - dm);
		for (int j = 0; j < dm; j++)
		{
			ptr[j] -= c * pm[j];
			ptr[j] %= p;
			ptr[j] = ptr[j] < 0 ? ptr[j] + p : ptr[j];
		}
	}

	for (int i = 0; i < dm; i++)
		g[i] = pf[i];
	delete pf;
}

ostream& operator<<(ostream& cout, GFX& f)
{
	int deg1 = f.deg();
	printf("[");
	for (int i = 0; i <= deg1; i++)
	{
		int deg2 = deg(f.rep[i]);
		int64* p = f.rep[i].data();

		printf(" [");
		for (int j = 0; j <= deg2; j++)
		{
			printf(" %lld ", p[j]);
		}
		printf("] ");
	}
	printf("]");
	return cout;
}

//c = a+b
void GFX_Calculator::Add(GFX&c, GFX& a, GFX&b)
{
	int d = a.d;
	int n = a.n;
	uint64 p = a.p;

	for (int i = 0; i < n; i++)
	{
		int64* pa = a.rep[i].data();
		int64* pb = b.rep[i].data();
		int64* pc = c.rep[i].data();

		for (int j = 0; j < d; j++)
		{
			pc[j] = pa[j] + pb[j];
			pc[j] -= ((p - pc[j]) >> 63)&p; //if c>p then c = c-p
		}
	}
}

//c = a-b
void GFX_Calculator::Sub(GFX&c, GFX& a, GFX&b)
{
	int d = a.d;
	int n = a.n;
	uint64 p = a.p;

	for (int i = 0; i < n; i++)
	{
		int64* pa = a.rep[i].data();
		int64* pb = b.rep[i].data();
		int64* pc = c.rep[i].data();

		for (int j = 0; j < d; j++)
		{
			pc[j] = pa[j] - pb[j];
			pc[j] += (pc[j] >> 63)&p; //if c<0 then c = c+p
		}
	}
}


//寻找合适的Z_P,以及2^k次单位根w
void GFX_Calculator::findP(int d, int n, uint64 p, uint64& P, uint64& w, int& k, int h)
{
	int N = 2 * ((2 * d)*n);
	k = ceil(log2(N / h));
	int N2 = pow(2, k);	// N2 * h = h*2^k >= N

	uint64 x = (N2*h * p*p) >> k;	// P > n*p^2
	do {
		x += 1;
		P = (x << k) + 1;
	} while (!Miller_Rabin(P, 10));	// 2^k | P-1

	int HN2 = N2 >> 1;
	for (w = 2; w < P; w++)
		if (pow_mod(w, HN2, P) == P - 1)	// order(w)=N2
			break;
}


//c = a*b,GF(p^d)=Z_p[x]/(m(x))
void GFX_Calculator::Mul(GFX&c, GFX& a, GFX&b)
{
	int d = a.d;
	int n = a.n;
	uint64 p = a.p;
	int BlockSize = 2 * d;
	int BlockNum = 2 * n;
	int Len = BlockSize * BlockNum;

	Poly A(Len);
	Poly B(Len);
	Poly C(Len);

	for (int i = 0; i < n; i++)
	{
		int64* pa = a.rep[i].data();
		int64* pb = b.rep[i].data();
		int64* pA = A.data() + (2 * d)*i;
		int64* pB = B.data() + (2 * d)*i;

		for (int j = 0; j < d; j++)
		{
			pA[j] = pa[j];
			pB[j] = pb[j];
		}
	}

	NTTRep A2, B2, C2;

	intt.FwdNTT(A2, A);
	intt.FwdNTT(B2, B);
	intt.NTTMul(C2, A2, B2);
	intt.InvNTT(C, C2);	//C=A*B

	int64* pC = C.data();
	for (int i = 0; i < Len; i++)
		pC[i] %= p;

	/*cout << "A = " << A; pn;
	cout << "B = " << B; pn;
	cout << "C = " << C; pn;*/

	c.zero();
	for (int i = 0; i < BlockNum; i++)
	{
		int64* pC = C.data() + BlockSize * i;
		PB.Mod(c.rep[i].data(), pC, BlockSize);
	}
}


//c = a*b,GF(p^d)=Z_p[x]/(m(x))
void GFX_Calculator::Mul2(GFX&c, GFX& a, GFX&b)
{
	int d = a.d;
	int n = a.n;
	uint64 p = a.p;
	int BlockSize = 2 * d;
	int BlockNum = 2 * n;
	int Len = BlockSize * BlockNum;

	Poly A(Len);
	Poly B(Len);
	Poly C(Len);

	for (int i = 0; i < n; i++)
	{
		int64* pa = a.rep[i].data();
		int64* pb = b.rep[i].data();
		int64* pA = A.data() + (2 * d)*i;
		int64* pB = B.data() + (2 * d)*i;

		for (int j = 0; j < d; j++)
		{
			pA[j] = pa[j];
			pB[j] = pb[j];
		}
	}

	NTTRep A2, B2, C2;

	intt.FwdNTT(A2, A);
	intt.FwdNTT(B2, B);
	intt.NTTMul(C2, A2, B2);
	intt.InvNTT(C, C2);	//C=A*B

	int64* pC = C.data();
	for (int i = 0; i < Len; i++)
		pC[i] %= p;

	/*cout << "A = " << A; pn;
	cout << "B = " << B; pn;
	cout << "C = " << C; pn;*/

	c.zero();
	for (int i = 0; i < BlockNum; i++)
	{
		int64* pC = C.data() + BlockSize * i;
		Modm(c.rep[i].data(), pC, BlockSize, m, p);
	}
}
结果
GFX Mul
m = [ 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 ]

GFX Mul: d=149,n=53,p=2 -> P=147457, W=22, K=14, H=2.
Barrett of Polynomials: acc=149, dg=149, p=2 -> P=3329, W=17, K=8, H=2.
a = [ [ 1 ]  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  [ 1 ] ]

b = [ [ 1  0  1 ]  [ 1 ] ]

10 rounds

使用Barrett:a*b = [ [ 1  0  1 ]  [ 1 ]  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  [ 1  0  1 ]  [ 1 ] ]
0.155162 s

使用长除法:a*b = [ [ 1  0  1 ]  [ 1 ]  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  [ 1  0  1 ]  [ 1 ] ]
0.291891 s

用长除法计算:a*b mod (x^n - 1) = [ [ 0  0  1 ]  [ 1 ]  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  []  [ 1  0  1 ] ]
0.0015902 s
  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值