矩阵乘法是可以分块的,而且幂的和也是具有线性的。
不难得到 Si = Si-1+A*Ai-1,Ai = A*Ai-1。然后矩阵快速幂就可以了。
/********************************************************* * ------------------ * * author AbyssalFish * **********************************************************/ #include<cstdio> #include<iostream> #include<string> #include<cstring> #include<queue> #include<vector> #include<stack> #include<vector> #include<map> #include<set> #include<algorithm> #include<cmath> #include<ctime> using namespace std; typedef long long ll; typedef vector<int> row; typedef vector<row> mat; int n, k, M; mat Mul; mat &operator *(mat &A, mat& B) { mat &R = Mul; R.assign(n,row(n)); for(int i = 0; i < n; i++){ for(int j = 0; j < n; j++){ for(int k = 0; k < n; k++){ R[i][j] = (R[i][j] +A[i][k]*B[k][j])%M; } } } return R; } //#define LOCAL #ifdef LOCAL void censor(mat &B) { for(auto r: B){ for(int c: r) cout<<c<<' '; cout<<endl; } } #endif mat operator ^(mat A,int q) { mat Re(n,row(n)); for(int i = 0; i < n; i++) Re[i][i] = 1; while(q){ if(q&1) Re = Re*A; A = A*A; q >>= 1; } return Re; } int main() { #ifdef LOCAL freopen("in.txt","r",stdin); #endif int nn; scanf("%d%d%d",&nn,&k,&M); n = 2*nn; mat A(nn,row(nn)); for(int i = 0; i < nn; i++){ for(int j = 0; j < nn; j++){ scanf("%d",&A[i][j]); } } mat B(n,row(n)); for(int i = 0; i < nn; i++) { B[i][i] = 1; copy(A[i].begin(),A[i].end(),B[i].begin()+nn); copy(A[i].begin(),A[i].end(),B[i+nn].begin()+nn); } B = B^k; for(int i = 0; i < nn; i++){ for(int j = 0; j < nn; j++){ printf("%d%c",B[i][j+nn],j==nn-1?'\n':' '); } } #ifdef LOCAL cout<<"rum time:"<<clock()<<"ms"<<endl; #endif // LOCAL return 0; }