矩阵快速幂 + 矩阵构造
AC如下
#include <bits/stdc++.h>
#include<iostream>
#include<algorithm>
#include<cstdlib>
#include<cstring>
#include<cstdio>
#include<string>
#include<vector>
#include<bitset>
#include<queue>
#include<deque>
#include<stack>
#include<cmath>
#include<list>
#include<map>
#include<set>
//#define DEBUG
#define RI register int
#define endl "\n"
using namespace std;
typedef long long ll;
//typedef __int128 lll;
const int N=100+10;
const int M=100000+10;
const int MOD=998244353;
const double PI = acos(-1.0);
const double EXP = 1E-8;
const int INF = 0x3f3f3f3f;
ll t,n,m,k,p,l,r,u,v;
int cnt,flag,temp,sum;
int a[N];
struct Matrix
{
int n;
Matrix(int nn = 1):n(nn)
{
memset(a,0,sizeof(a));
};
long long a[N][N];
void print()
{
for(int i = 0; i <= n; ++i)
for(int j= 0; j <= n; ++j)
printf("%lld%c",a[i][j]," \n"[j==n]);
}
Matrix operator*(const Matrix &b)const
{
Matrix c(n);
for(int i = 0; i <= n; ++i)
{
for(int j = 0; j <= n; ++j)
{
for(int k = 0; k <= n; ++k)
{
c.a[i][j] += a[i][k] * b.a[k][j];
c.a[i][j] %= MOD;
}
}
}
//c.print();
return c;
}
};
Matrix ans,fac;
ll POW(ll a,ll b,ll c)
{
ll res=1;
ll base=a%c;
while(b)
{
if(b&1)res=(res*base)%c;
base=(base*base)%c;
b>>=1;
}
return res;
}
void MatrixPOW(ll k)
{
while(k)
{
if(k&1)ans=ans*fac;
fac=fac*fac;
k>>=1;
}
}
void init()
{
ans.n = fac.n = 2;
ans.a[0][0] = 1;
ans.a[1][1]=1;
ans.a[0][1] = 0;
fac.a[0][0] = 0;
fac.a[0][1] = n-1;
fac.a[1][0] = 1;
fac.a[1][1] = n-2;
}
int main()
{
#ifdef DEBUG
freopen("input.in", "r", stdin);
//freopen("output.out", "w", stdout);
#endif
//ios::sync_with_stdio(false);
//cin.tie(0);
//cout.tie(0);
//scanf("%d",&t);
//while(t--){
scanf("%lld%lld",&n,&k);
init();
//ans.print();
//fac.print();
MatrixPOW(k);
//ans.print();
cout<<ans.a[0][0]<<endl;
//}
#ifdef DEBUG
printf("Time cost : %lf s\n",(double)clock()/CLOCKS_PER_SEC);
#endif
//cout << "Hello world!" << endl;
return 0;
}