首先,可以先思考一下 n n n 较小的情况。
设
d
p
i
dp_i
dpi 表示凑成空间恰好为
n
n
n 的体积的方案数。
显然可得:
d
p
i
=
d
p
i
−
1
+
d
p
i
−
m
dp_i = dp_{i - 1} + dp_{i - m}
dpi=dpi−1+dpi−m。
答案:
d
p
n
dp_n
dpn
但是你会发现,上述做法的时间复杂度是
O
(
n
)
O(n)
O(n),这么做会
T
L
E
TLE
TLE。可以考虑使用矩阵快速幂优化。详细思路见下图:
具体实现可以参考我的代码:
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
const int M = 110;
ll t[M][M], f[2][M], dp[M];
ll n;
const ll mod = 1e9 + 7;
int m;
void mul1() {
ll f2[M][M];
memcpy(f2, f, sizeof(f2));
memset(f, 0, sizeof(f));
for (int i = 1; i <= 1; i++) {
for (int j = 1; j <= m; j++) {
for (int k = 1; k <= m; k++) {
f[i][j] += f2[i][k] * t[k][j];
f[i][j] %= mod;
}
}
}
}
void mul2() {
ll t2[M][M];
memcpy(t2, t, sizeof(t2));
memset(t, 0, sizeof(t));
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= m; j++) {
for (int k = 1; k <= m; k++) {
t[i][j] += t2[i][k] * t2[k][j];
t[i][j] %= mod;
}
}
}
}
int main() {
ios::sync_with_stdio(false), cin.tie(0);
cin >> n >> m;
for (int i = 2; i <= m; i++) {
t[i - 1][i] = 1;
}
t[1][1] = 1;
t[m][1] = 1;
dp[0] = 1;
for (int i = 1; i <= m; i++) {
dp[i] = dp[i - 1];
if (i >= m) {
dp[i] += dp[i - m];
}
dp[i] %= mod;
}
if (n < m) {
cout << dp[n] << "\n";
return 0;
}
for (int i = 1; i <= m; i++) {
f[1][i] = dp[m - i + 1];
}
ll tot = n - m;
while (tot) {
if (tot & 1) {
mul1();
}
mul2();
tot /= 2;
}
cout << f[1][1] << "\n";
return 0;
}