快速乘 + Montgomery modular multiplication

最新推荐文章于 2023-03-09 16:58:01 发布

Scar_Halo

最新推荐文章于 2023-03-09 16:58:01 发布

阅读量866

点赞数 1

分类专栏： ACM - 黑科技函数文章标签：快速乘 Montgomery modular multiplication O(1) 快速模

本文链接：https://blog.csdn.net/Scar_Halo/article/details/82946977

版权

ACM - 黑科技函数专栏收录该内容

15 篇文章 0 订阅

订阅专栏

$O （ l o g n ） :$

ll mult(ll a, ll b, ll p) {
    a %= p; b %= p;
    ll ret = 0;
    while(b) {
        if(b & 1) ret = (ret + a) % p;
        a = (a + a) % p;
        b >>= 1;
    }
    return ret;
}

$O （ 1 ） :$

出自：
在这里插入图片描述

ll qmul(ll a, ll b) {
    return (a*b - (ll)((long double)a/mod*b)*mod+mod)%mod;
}

$P S ：$
     ①、double可能会挂，最好long double
     ②、 $u, v > = p$ 可能会挂，必要时先% $p$
     ③、用浮点数算出 $u * v / p$ 的值时事实上允许了 $\pm 1$ 的误差，因此可能出现负数，所以必须 $+ p$ 再% $p$ 。因此理论上不需要+ $e p s$

另一种模板：

ll qmul(ll a, ll b) {
	ll d = (ll)floor(a*(long double)b/mod+0.5);
	ll ret = a * b - d * mod;
	if (ret < 0) ret += mod;
	return ret;
}

也称 $O (1)$ 快速乘

但还有一种黑科技，根据牛客的一道题：电音之王

用 $O (1)$ 快速乘可以勉强卡过这道题
但根据出题人DLS的一句话题解，正确做法为：Montgomery modular multiplication

暂时没人写详细的博客
这里附上DLS用Montgomery modular multiplication的解题代码：

#include <bits/stdc++.h>
using namespace std;
#define rep(i,a,n) for (int i=a;i<n;i++)
#define per(i,a,n) for (int i=n-1;i>=a;i--)
#define pb push_back
#define mp make_pair
#define all(x) (x).begin(),(x).end()
#define fi first
#define se second
#define SZ(x) ((int)(x).size())
typedef vector<int> VI;
typedef long long ll;
typedef pair<int,int> PII;
const ll mod=1000000007;
ll powmod(ll a,ll b) {ll res=1;a%=mod; assert(b>=0); for(;b;b>>=1){if(b&1)res=res*a%mod;a=a*a%mod;}return res;}
ll gcd(ll a,ll b) { return b?gcd(b,a%b):a;}
// head
 
typedef unsigned long long u64;
typedef __int128_t i128;
typedef __uint128_t u128;
int _,k;
u64 A0,A1,M0,M1,C,M;
 
struct Mod64 {
	Mod64():n_(0) {}
	Mod64(u64 n):n_(init(n)) {}
	static u64 init(u64 w) { return reduce(u128(w) * r2); }
	static void set_mod(u64 m) {
		mod=m; assert(mod&1);
		inv=m; rep(i,0,5) inv*=2-inv*m;
		r2=-u128(m)%m;
	}
	static u64 reduce(u128 x) {
		u64 y=u64(x>>64)-u64((u128(u64(x)*inv)*mod)>>64);
		return ll(y)<0?y+mod:y;
	}
	Mod64& operator += (Mod64 rhs) { n_+=rhs.n_-mod; if (ll(n_)<0) n_+=mod; return *this; }
	Mod64 operator + (Mod64 rhs) const { return Mod64(*this)+=rhs; }
	Mod64& operator -= (Mod64 rhs) { n_-=rhs.n_; if (ll(n_)<0) n_+=mod; return *this; }
	Mod64 operator - (Mod64 rhs) const { return Mod64(*this)-=rhs; }
	Mod64& operator *= (Mod64 rhs) { n_=reduce(u128(n_)*rhs.n_); return *this; }
	Mod64 operator * (Mod64 rhs) const { return Mod64(*this)*=rhs; }
	u64 get() const { return reduce(n_); }
	static u64 mod,inv,r2;
	u64 n_;
};
u64 Mod64::mod,Mod64::inv,Mod64::r2;
 
u64 pmod(u64 a,u64 b,u64 p) {
	u64 d=(u64)floor(a*(long double)b/p+0.5);
	ll ret=a*b-d*p;
	if (ret<0) ret+=p;
	return ret;
}
 
 
void bruteforce() {
	u64 ans=1;
	for (int i=0;i<=k;i++) {
		ans=pmod(ans,A0,M);
		u64 A2=pmod(M0,A1,M)+pmod(M1,A0,M)+C;
		while (A2>=M) A2-=M;
		A0=A1; A1=A2;
	}
	printf("%llu\n",ans);
}
 
int main() {
	for (scanf("%d",&_);_;_--) {
		scanf("%llu%llu%llu%llu%llu%llu%d",&A0,&A1,&M0,&M1,&C,&M,&k);
		Mod64::set_mod(M);
		Mod64 a0(A0),a1(A1),m0(M0),m1(M1),c(C),ans(1),a2(0);
		for (int i=0;i<=k;i++) {
			ans=ans*a0;
			a2=m0*a1+m1*a0+c;
			a0=a1; a1=a2;
		}
		printf("%llu\n",ans.get());
	}
}

（比 $O (1)$ 快速乘快了 $1$ 秒多）

简化为：

const int mod = 1e9 + 7;
typedef unsigned long long ull;
typedef __uint128_t L;
struct FastMod {
	ull b, m;
	FastMod(ull b) : b(b), m(ull((L(1) << 64) / b)) {}
	inline ull reduce(ull a) { 
		return a - (ull)((L(m) * a) >> 64) * b;
	}
} F(mod);