Solution1
分块+块内树状数组。可以踩掉标算,甚至碾压。
#include <cstdio>
#include <cstring>
#include <cmath>
#define rint register int
#define N 50010
#include <algorithm>
typedef long long ll;
using namespace std;
const ll mo = 1e9 + 7;
inline char gc() {
static char now[1<<16], *S, *T;
if(S == T) {T = (S = now) + fread(now, 1, 1<<16, stdin); if(S == T) return EOF;}
return *S++;
}
inline int read() {
int x = 0, f = 1; char c = gc();
while(c < '0' || c > '9') {if(c == '-') f = -1; c = gc();}
while(c >= '0' && c <= '9') {x = x * 10 + c - 48; c = gc();}
return x * f;
}
inline void print(int x) {
if(x == 0) {puts("0"); return ;}
if(x < 0) putchar('-'), x = 0 - x;
int d[30], now = -1;
while(x) {d[++now] = x % 10; x/= 10;}
for(int i = now; i >= 0; --i) putchar('0' + d[i]); puts("");
}
int n, m, block, num;
int belong[N], q[N], mp[N]; ll s[500][200];
struct node {int x, y;}a[N], b[500];
inline void build(int x) {
for(rint i = b[x].x; i <= b[x].y; ++i) q[i] = a[i].x;
sort(q+b[x].x, q+b[x].y+1); for(rint i = 1; i <= b[x].y - b[x].x + 1; ++i) s[x][i] = 0;
for(rint i = b[x].x; i <= b[x].y; ++i) s[x][i - b[x].x + 1] = (s[x][i - b[x].x] + mp[q[i]]) % mo;
}
ll bit1[N], ans = 0, bit2[N];
inline void add1(int x, ll v) {for(rint i = x; i <= n; i+= i & -i) bit1[i]+= v, bit1[i]%= mo;}
inline ll query1(int x) {ll ret = 0; for(rint i = x; i; i-= i & -i) ret+= bit1[i], ret%= mo; return ret;}
inline void add2(int x, ll v) {for(rint i = x; i <= n; i+= i & -i) bit2[i]+= v, bit2[i]%= mo;}
inline ll query2(int x) {ll ret = 0; for(rint i = x; i; i-= i & -i) ret+= bit2[i], ret%= mo; return ret;}
inline void init() {
for(rint i = 1; i <= n; ++i) bit1[i] = bit2[i] = 0;
for(rint i = n; i >= 1; --i) {
ans+= query1(a[i].x - 1) + query2(a[i].x) * a[i].y; ans%= mo; if(ans < 0) ans+= mo;
add1(a[i].x, a[i].y); add2(a[i].x, 1);
}
}
int main() {
n = read(); m = read();
for(rint i = 1; i <= n; ++i) {a[i].x = read(); a[i].y = read(); mp[a[i].x] = a[i].y;}
block = (int)sqrt(n)/2; num = 0;
for(rint i = 1; i <= n; i+= block) {
b[++num].x = i; b[num].y = min(n, i + block - 1);
for(rint j = i; j <= b[num].y; ++j) belong[j] = num;
}
for(rint i = 1; i <= num; ++i) build(i); init();
for(rint i = 1; i <= m; ++i) {
int x = read(), y = read(); if(x > y) swap(x, y); if(x == y) {print(ans); continue;}
int b1 = belong[x], b2 = belong[y];
for(rint j = b1 + 1; j <= b2 - 1; ++j) {
int sz = b[j].y - b[j].x + 1;
int p1 = lower_bound(q+b[j].x, q+b[j].y+1, a[x].x) - q;
p1-= b[j].x; ll temp1 = s[j][p1]; if(temp1 < 0) temp1+= mo;
ll ctb1 = (p1 * a[x].y + temp1) % mo;
int p2 = lower_bound(q+b[j].x, q+b[j].y+1, a[y].x) - q;
p2-= b[j].x; ll temp2 = s[j][p2]; if(temp2 < 0) temp2+= mo;
ll ctb2 = (p2 * a[y].y + temp2) % mo;
ans+= ctb2 - ctb1; if(ans < 0) ans+= mo;
ll temp = s[j][sz]; if(temp < 0) temp+= mo;
ctb1 = ((sz - p1) * a[x].y + temp - temp1) % mo;
ctb2 = ((sz - p2) * a[y].y + temp - temp2) % mo;
ans+= ctb1 - ctb2; if(ans < 0) ans+= mo;
}
ll ad = 0;
for(rint j = x + 1; j <= min(y - 1, b[b1].y); ++j) {
if(a[x].x > a[j].x) ad-= a[x].y + a[j].y, ad%= mo; else ad+= a[x].y + a[j].y, ad%= mo; if(ad < 0) ad+= mo;
if(a[y].x > a[j].x) ad+= a[y].y + a[j].y, ad%= mo; else ad-= a[y].y + a[j].y, ad%= mo; if(ad < 0) ad+= mo;
}
ll ad1 = ad;
for(rint j = max(x + 1, b[b2].x); j <= y - 1; ++j) {
if(a[j].x > a[y].x) ad-= a[y].y + a[j].y, ad%= mo; else ad+= a[y].y + a[j].y, ad%= mo; if(ad < 0) ad+= mo;
if(a[j].x > a[x].x) ad+= a[x].y + a[j].y, ad%= mo; else ad-= a[x].y + a[j].y, ad%= mo; if(ad < 0) ad+= mo;
}
if(b1 == b2) ans+= ad1; else ans+= ad; ans%= mo;
swap(a[x], a[y]); build(b1); build(b2);
if(a[x].x < a[y].x) ans-= a[y].y + a[x].y; else ans+= a[y].y + a[x].y; if(ans < 0) ans+= mo; ans%= mo;
print(ans);
}
return 0;
}
洛谷(O2)评测结果:用时: 7676ms / 内存: 4085KB
Solution2
树套树做法。标算。大概我天生常数大。
#include <cstdio>
#include <cstring>
#include <vector>
using namespace std;
inline char gc() {
static char now[1<<16], *S, *T;
if(S == T) {T = (S = now) + fread(now, 1, 1<<16, stdin); if(S == T) return EOF;}
return *S++;
}
inline int read() {
int x = 0; char c = gc();
while(c < '0' || c > '9') c = gc();
while(c >= '0' && c <= '9') {x = x * 10 + c - 48; c = gc();}
return x;
}
void print(int x) {
if(x/10) print(x/10);
putchar('0' + x % 10);
}
const int N = 50010;
const int mod = 1e9 + 7;
typedef long long ll;
int n, m, a[N], val[N]; ll ans = 0;
ll bit[N];
inline void addbit(int x, int V) {for(; x <= n; x+= x & -x) bit[x]+= V, bit[x]%= mod;}
inline ll askbit(int x) {ll ret = 0; for(; x; x-= x & -x) ret+= bit[x], ret%= mod; return ret;}
inline void init() {
for(int i = n; i; --i) {
ans = (ans + askbit(a[i] - 1)) % mod;
addbit(a[i], val[i]);
}
memset(bit, 0, sizeof(bit));
for(int i = 1; i <= n; ++i) {
ans = (ans + ((askbit(n) - askbit(a[i])) % mod + mod) % mod) % mod;
addbit(a[i], val[i]);
}
}
int rt[N], len = 0, L[N*256], R[N*256], cnt[N*256]; ll v[N*256]; vector<int> st;
inline int newnode() {
int ret;
if(st.begin() != st.end()) {ret = st.back(); st.pop_back();}
else ret = ++len;
return ret;
}
void add(int &p, int l, int r, int pos, int x) {
if(!p) p = newnode();
v[p]+= x; v[p]%= mod; if(v[p] < 0) v[p]+= mod;
cnt[p]+= (x > 0) ? (1) : (-1);
if(l < r) {
int mid = (l + r)>>1;
if(pos <= mid) add(L[p], l, mid, pos, x);
else add(R[p], mid + 1, r, pos, x);
}
if(!cnt[p]) {st.push_back(p); cnt[p] = v[p] = L[p] = R[p] = 0; p = 0;}
}
vector<int> b1, b2;
inline void getBIT(int x, int y) {
b1.clear(); b2.clear();
for(; y; y-= y & -y) b1.push_back(rt[y]);
--x; for(; x; x-= x & -x) b2.push_back(rt[x]);
}
ll getV(int p, int l, int r, int x, int y) {
if(!p) return 0;
if(x <= l && r <= y) return v[p];
int mid = (l + r)>>1;
ll sum = 0;
if(x <= mid) sum+= getV(L[p], l, mid, x, y), sum%= mod;
if(mid + 1 <= y) sum+= getV(R[p], mid + 1, r, x, y), sum%= mod;
return sum;
}
ll getC(int p, int l, int r, int x, int y) {
if(!p) return 0;
if(x <= l && r <= y) return cnt[p];
int mid = (l + r)>>1;
int sum = 0;
if(x <= mid) sum+= getC(L[p], l, mid, x, y);
if(mid + 1 <= y) sum+= getC(R[p], mid + 1, r, x, y);
return sum;
}
int main() {
n = read(); m = read();
for(int i = 1; i <= n; ++i) {
a[i] = read(); val[i] = read();
for(int j = i; j <= n; j+= j & -j) {
add(rt[j], 1, n, a[i], val[i]);
}
}
init();
for(int i = 1; i <= m; ++i) {
int x = read(), y = read(); if(x > y) swap(x, y); if(x == y) {print(ans); puts(""); continue;}
if(a[x] < a[y]) ans = (ans + val[x] + val[y]) % mod; else ans = ((ans - val[x] - val[y]) % mod + mod) % mod;
getBIT(x + 1, y - 1);
for(vector<int>::iterator it = b1.begin(); it != b1.end(); ++it) {
ans = ((ans - (getV(*it, 1, n, 1, a[x] - 1) + getC(*it, 1, n, 1, a[x] - 1) * val[x] % mod) % mod) % mod + mod) % mod;
ans = (ans + (getV(*it, 1, n, a[x] + 1, n) + getC(*it, 1, n, a[x] + 1, n) * val[x] % mod) % mod) % mod;
ans = ((ans - (getV(*it, 1, n, a[y] + 1, n) + getC(*it, 1, n, a[y] + 1, n) * val[y] % mod) % mod) % mod + mod) % mod;
ans = (ans + (getV(*it, 1, n, 1, a[y] - 1) + getC(*it, 1, n, 1, a[y] - 1) * val[y] % mod) % mod) % mod;
}
for(vector<int>::iterator it = b2.begin(); it != b2.end(); ++it) {
ans = ((ans - (getV(*it, 1, n, a[x] + 1, n) + getC(*it, 1, n, a[x] + 1, n) * val[x] % mod) % mod) % mod + mod) % mod;
ans = (ans + (getV(*it, 1, n, 1, a[x] - 1) + getC(*it, 1, n, 1, a[x] - 1) * val[x] % mod) % mod) % mod;
ans = ((ans - (getV(*it, 1, n, 1, a[y] - 1) + getC(*it, 1, n, 1, a[y] - 1) * val[y] % mod) % mod) % mod + mod) % mod;
ans = (ans + (getV(*it, 1, n, a[y] + 1, n) + getC(*it, 1, n, a[y] + 1, n) * val[y] % mod) % mod) % mod;
}
print(ans); puts("");
for(int j = x; j <= n; j+= j & -j) add(rt[j], 1, n, a[x], -val[x]);
for(int j = y; j <= n; j+= j & -j) add(rt[j], 1, n, a[y], -val[y]);
swap(a[x], a[y]); swap(val[x], val[y]);
for(int j = x; j <= n; j+= j & -j) add(rt[j], 1, n, a[x], val[x]);
for(int j = y; j <= n; j+= j & -j) add(rt[j], 1, n, a[y], val[y]);
}
return 0;
}
洛谷(O2)评测结果:用时: 14720ms / 内存: 75117KB
然而bzoj就不那么友好了。