- share memory空间的申请
__shared__ int smem[1024]
- load_matrix_sync 从share memory load一个matrix到fragment中
wmma::load_matrix_sync(fragment<matrix_a/matrix_b, M, N, K, DType, row_major/col_major>& frag
__shared__ int smem[1024]
wmma::load_matrix_sync(fragment<matrix_a/matrix_b, M, N, K, DType, row_major/col_major>& frag