https://www.cometoj.com/contest/59/problem/E?problem_id=2714
思路:高斯消元求dp[1]~dp[k-1];然后利用递推式矩阵快速幂求出dp[d];
#include <iostream>
#include <cstring>
#include <queue>
#include <vector>
#include <algorithm>
#include <cstdio>
//#include<bits/stdc++.h>
using namespace std;
#define sfi(i) scanf("%d",&i)
#define sfs(i) scanf("%s",(i))
#define pri(i) printf("%d\n",i)
#define sff(i) scanf("%lf",&i)
#define ll long long
#define ull unsigned long long
#define mem(x,y) memset(x,y,sizeof(x))
#define INF 0x3f3f3f3f
#define eps 1e-10
#define PI acos(-1.0)
#define lowbit(x) ((x)&(-x))
#define zero(x) (((x)>0?(x):-(x))<eps)
#define fl() printf("flag\n")
#define MOD(x) ((x%mod)+mod)%mod
#define endl '\n'
#define pb push_back
#define FAST_IO ios::sync_with_stdio(false);cin.tie(0);cout.tie(0)
//---------------------------------------------------------
#include<ext/pb_ds/assoc_container.hpp>
#include<ext/pb_ds/hash_policy.hpp>
#include <ext/pb_ds/priority_queue.hpp>
using namespace __gnu_pbds;
//gp_hash_table<string,int>mp2;
//__gnu_pbds::priority_queue<int>q;//因为放置和std重复,故需要带上命名空间
//__gnu_pbds::priority_queue<int,greater<int>,pairing_heap_tag> pq;//最快
//----------------------------------------------------------
/*
//----------------------------------------------------------
const int BufferSize = 1 << 16;
char buffer[BufferSize], *head, *tail;
inline char Getchar() {
if (head == tail) {
int l = fread(buffer, 1, BufferSize, stdin);
tail = (head = buffer) + l;
}
return *head++;
}
inline ll read() {
ll x = 0, f = 1;char c = Getchar();
for (;!isdigit(c);c = Getchar()) if (c == '-') f = -1;
for (;isdigit(c);c = Getchar()) x = x * 10 + c - '0';
return x * f;
}
//----------------------------------------------------------
*/
const int maxn=2e6+9;
const ll mod=1e9+7;
ll power(ll x,ll n)
{
ll ans=1;
while(n)
{
if(n&1) ans=ans*x%mod;
n>>=1;
x=x*x%mod;
}
return ans;
}
ll n,d,k;
struct M
{
ll a[23][23];
void init()
{
mem(a,0);
}
void Base()
{
init();
for(int i=0;i<=23;i++) a[i][i]=1;
}
};
M Mmul(M x,M y)
{
M res;
res.init();
int n=k+1;
for(int i=1;i<=n;i++)
{
for(int j=1;j<=n;j++)
{
for(int k=1;k<=n;k++)
{
res.a[i][j]=res.a[i][j]+(x.a[i][k]*y.a[k][j])%mod;
res.a[i][j]%=mod;
}
}
}
return res;
}
M Mpower(M x,ll n)
{
M res;
res.Base();
while(n)
{
if(n&1) res=Mmul(res,x);
x=Mmul(x,x);
n>>=1;
}
return res;
}
ll A[110][110],x[110];
void Guass(ll n,ll m,ll A[][110])//有n个未知数,m个方程
{
ll i=1,j=1,k,r,c;
while(i<=m && j<=n)//正在处理第i个方程,解第j个未知数
{
r=i;//找到绝对值最大的系数,防止除数为0的情况,使得其他方程组系数不会变得太大
for(k=i+1;k<=m;k++)if(A[k][j]>A[r][j])r=k;
if(A[r][j]>0)//出现为0的情况,说明此项已经被消掉了,直接用进行下一个未知数,而方程不变,不过这个时候,一般来说跳过的这个元素就没有固定解啦
{
for(c=1;c<=n+1;c++)swap(A[i][c],A[r][c]);//交换
for(k=i+1;k<=m;k++)if(A[k][j]>0)
{
ll f=A[k][j]*power(A[i][j],mod-2)%mod;
for(c=j;c<=n+1;c++)//当前方程j前面的系数都是0
A[k][c]=(A[k][c]-f*A[i][c]%mod+mod)%mod;
}
i++;//获取下一个方程
}
j++;//去消下一个未知数
}
for(ll i=n;i>=1;i--)
{
for(j=i+1;j<=n;j++)
A[i][n+1]=(A[i][n+1]-A[i][j]*x[j]%mod+mod)%mod;
x[i]=A[i][n+1]*power(A[i][i],mod-2)%mod;
//cout<<i<<" "<<x[i]<<endl;
}
}
int main()
{
//FAST_IO;
//freopen("input.txt","r",stdin);
cin>>d>>k;
ll kk=power(k,mod-2);
n=k-1;
for(int i=1;i<=n;i++)
{
for(int j=1;j<=n;j++)
{
if(i+j<=k)
{
if(j<i)
{
A[i][j]=MOD(2*kk);
}
else if(j>i)
{
A[i][j]=kk;
}
else A[i][j]=MOD(kk-1);
}
else
{
if(j<i)
{
A[i][j]=kk;
}
else if(j>i)
{
A[i][j]=0;
}
else A[i][j]=MOD(-1);
}
}
A[i][n+1]=MOD(-1);
}
/*for(int i=1;i<=n;i++)
{
for(int j=1;j<=n;j++)
{
cout<<A[i][j]<<" ";
}
cout<<endl;
}*/
Guass(n,n,A);
M ans;
ans.Base();
M tmp;
tmp.init();
for(int i=1;i<=k;i++) tmp.a[1][i]=kk;
tmp.a[1][k+1]=1;
for(int i=2;i<=k;i++)
{
tmp.a[i][i-1]=1;
}
tmp.a[k+1][k+1]=1;
ans=Mpower(tmp,d-k+1);
//fl();
M xx;
xx.init();
for(int i=1;i<=k;i++)
{
xx.a[i][1]=x[i];
}
xx.a[k+1][1]=1;
ans=Mmul(ans,xx);
cout<<ans.a[1][1]<<endl;
return 0;
}