http://www.elijahqi.win/archives/3822
看做一个有向环
考虑我们其实从每个点出发如果采用最优策略那么一定都是一样的 那么不妨来看
我一遍扫描一边带着前缀和 如果遇到是小于平均值的我优先分配给他我最前面的
注意扫描一遍是不够的 但最多两遍即可 因为最优策略不可能绕一圈以上
#include<bits/stdc++.h>
#define ll long long
using namespace std;
inline char gc(){
static char now[1<<16],*S,*T;
if (T==S){T=(S=now)+fread(now,1,1<<16,stdin);if (T==S) return EOF;}
return *S++;
}
inline int read(){
int x=0,f=1;char ch=gc();
while(!isdigit(ch)) {if (ch=='-') f=-1;ch=gc();}
while(isdigit(ch)) x=x*10+ch-'0',ch=gc();
return x*f;
}
const int N=2e5+10;
const int inf=0x3f3f3f3f;
int n,m,s[N];
vector<int> q[N];queue<int> pd;
ll a[N],ans;
inline void add(int p,int sta,int ed){
if(sta<ed) a[p]+=ed-sta,ans+=ed-sta;
else{
a[p]+=ed+1+m-1-sta;ans+=ed+1+m-1-sta;
}
}
int main(){
// freopen("d.in","r",stdin);
n=read();m=read();int d=n/m,st;
for (int i=1;i<=n;++i) a[i]=read(),q[a[i]%m].push_back(i);
for (int i=0;i<m;++i){
if (i==0) {s[i]=q[i].size()-d;continue;}
s[i]=s[i-1]+q[i].size()-d;
}int now=inf;
for (int i=0;i<m;++i){
if (s[i]<now) now=s[i],st=i+1;
}st%=m;
for (int i=0;i<2*n;++i){
if (q[st].size()>d) {pd.push(st);++st;st%=m;continue;}
if (q[st].size()<d){
int x;
while(1){
x=pd.front();
while(q[x].size()>d&&q[st].size()<d){
add(q[x][q[x].size()-1],x,st);
q[x].pop_back();q[st].push_back(1);
}if (q[x].size()==d) pd.pop();
if (q[st].size()==d) break;
}continue;
}++st;st%=m;
}printf("%lld\n",ans);
for (int i=1;i<=n;++i) printf("%lld ",a[i]);
return 0;
}