∑i=1n∑1<=j<=m,j≠i(n mod i)∗(m mod j)=∑i=1n∑1<=j<=m,j≠i(n−i∗⌊ni⌋)∗(m−j∗⌊mj⌋)=∑i=1n(n−i∗⌊ni⌋)∗∑1<=j<=m,j≠i(m−j∗⌊mj⌋)=∑i=1n(n−i∗⌊ni⌋)∗∑j=1m(m−j∗⌊mj⌋)−∑i=1min(n,m)(n−i∗⌊ni⌋)∗(m−j∗⌊mj⌋)=(n2−∑i=1ni∗⌊ni⌋)∗(m2−∑i=1mi∗⌊mi⌋)−min(n,m)∗n∗m+(m∗∑i=1min(n,m)i∗⌊ni⌋+n∗∑i=1min(n,m)i∗⌊mi⌋)−∑i=1min(n,m)i2∗⌊ni⌋∗⌊mi⌋
然后分块加速搞就可以了,总的复杂度: o(n√) ,注意乘积越界的问题,还有取模的问题就搞定了。
#include <bits/stdc++.h>
#define LL long long
#define FOR(i,x,y) for(int i = x;i < y;++ i)
#define IFOR(i,x,y) for(int i = x;i > y;-- i)
using namespace std;
const int Mod = 19940417;
LL inv;
LL n,m;
void Gcd(LL a,LL b,LL& d,LL& x,LL& y){
if(!b) {d = a;x = 1;y = 0;}
else{Gcd(b,a%b,d,y,x);y -= x*(a/b);}
}
LL Inv(LL a,LL n){
LL d,x,y;
Gcd(a,n,d,x,y);
return d == 1 ? (x+n)%n : -1;
}
void work(){
LL t1 = 0;
for(LL i = 1;i <= m;){
LL d = m/i;
LL j = m/d;
LL tem = ((i+j)*(j-i+1)/2)%Mod;
t1 = (t1+d*tem%Mod)%Mod;
i = j+1;
}
t1 = ((m*m)%Mod+Mod-t1)%Mod;
LL t2 = 0;
for(LL i = 1;i <= n;){
LL d = n/i;
LL j = n/d;
LL tem = ((i+j)*(j-i+1)/2)%Mod;
t2 = (t2+d*tem%Mod)%Mod;
i = j+1;
}
t2 = ((n*n)%Mod+Mod-t2)%Mod;
LL ans = t1*t2%Mod;
int c = min(n,m);
t1 = t2 = 0;
LL t3 = 0;
for(LL i = 1;i <= c;){
LL d1 = n/i,d2 = m/i;
LL j = min(n/d1,m/d2);
LL tem = ((i+j)*(j-i+1)/2)%Mod;
t1 = (t1+d1*tem%Mod)%Mod;
t2 = (t2+d2*tem%Mod)%Mod;
LL t_1 = (((j*(j+1)%Mod)*((j*2+1)%Mod)%Mod)*inv)%Mod;
LL t_2 = (((i*(i-1)%Mod)*((i*2-1)%Mod)%Mod)*inv)%Mod;
LL t_ = (t_1-t_2+Mod)%Mod;
t_ = ((d1*d2)%Mod)*t_%Mod;
t3 = (t3+t_)%Mod;
i = j+1;
}
LL res = ((((m*n)%Mod)*c%Mod-(m*t1)%Mod-(n*t2)%Mod+t3)%Mod+Mod)%Mod;
ans = (ans-res+Mod)%Mod;
printf("%lld\n",ans);
}
int main()
{
//freopen("test.in","r",stdin);
inv = Inv(6,Mod);
while(~scanf("%lld%lld",&n,&m)){
work();
}
return 0;
}