【CANN训练营0基础赢满分秘籍】高阶班 TIK C++算子动态shape实现与调试技术

1 动态算子

1.1 tiling结构体

主要的操作流程如下:
1.1.png
tiling结构体中的信息:

  • BLOCK_DIN:并行计算使用的核数
  • TOTAL_LENGTH:总共需要计算的数据个数
  • TILE_NUM:每个核上计算数据分块的个数

1.2 tiling解析函数

核函数传入tiling指针,与x,y,z的角色相同,添加获得tiling结构体的宏函数调用GET_TILING_DATA
1.2.png

1.22.png

2 动态与固态shape对比

2固态对比动态.png

2.1 Init()函数

  • 固定shape
__aicore__ inline void Init(gm_ uint8_t* x,_gm_ uint8_t* y,_gm_uint8_t* z)
{
// get start index for current core,core parallel
xGm.SetGlobalBuffer(( gm _ half*)x + block_idx *BLOCK_LENGTH);
yGm.SetGlobalBuffer(( gm _ half*)y + block_idx * BLOCK_LENGTH);
zGm.SetGlobalBuffer((gm__ half*)z + block_idx * BLOCK_LENGTH);
//pipe alloc memory to queue,the unit is Bytes
pipe.InitBuffer(inQueueX,BUFFER_NUM,TILE_LENGTH * sizeof(half));
pipe.InitBuffer(inQueueY,BUFFER_NUM,TILE_LENGTH * sizeof(half));
pipe.InitBuffer(outQueueZ,BUFFER_NUM,TILE_LENGTH * sizeof(half));
}
  • 动态shape
__aicore__ inline void Init( gm_ uint8_t* x,__gm_ _uint8_t*y,gm__ uint8_t*z,
uint32_t blockDim,uint32_t totalLength,uint32_t tileNum)
{
this->blockLength = totalLength / blockDim;
this->tileNum = tileNum;
this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
// get start index for current core,core parallel
xGm.SetGlobalBuffer((gm_ half*)x + block_idx * this->blockLength);
yGm.SetGlobalBuffer(( gm_ half*)y + block_idx * this->blockLength);
zGm.SetGlobalBuffer(( gm _ half*)z + block_idx * this->blockLength);
// pipe alloc memory to queue,the unit is Bytes
pipe.InitBuffer(inQueueX,BUFFER_NUM,this->tileLength * sizeof(half));
pipe.InitBuffer(inQueueY,BUFFER_NUM,this->tileLength * sizeof(half));
pipe.InitBuffer(outQueueZ,BUFFER_NUM,this->tileLength * sizeof(half));
}

2.2 add_tik2.py

  • 固定shape输入的真值生成脚本
def gen_ golden_data_simple():
	input_x = np.random.uniform( -100,100[8,2048]).astype(np.float16)
	inputy =np.random.uniform( -100100[8,2048]).astype(np.float16)
	golden =(input_x + input y).astype(np.float16)
	input_x.tofile("./ input/input_x.bin")
	input_y.toftlef"./input/input_y.bin")
	golden.tofile(i./output/golden.bin")
  • 动态shape输入的真值生成脚本
def gen golden data simple():
	one_repeat_calcount = 128#fixed
	block dim imm = 8
	tile_num imm = 8
	double_buffer_imm = 2# fixed
	total_length_imm = block_dim imm *one_repeat_calcount* tile_num imm*double_buffer_im

	block_dim = np.array(block_dim_imm,dtype=np.uint32)
	total_length = 	np.array(total_length_imm,dtype=np.uint32)
	tile_num = np.array(tile_num_imm,dtype=np.uint32)
		tiling = (block_dim,total_length, tile_num)
	tiling_data = b'-join(x.tobytes( for x in tiling)
	with open( ' ./input/tiling.bin', 'wb') as f:
		f.write(tiling_data)
	input_x = np.random.uniform(-100100[total_length_imm, ]).astype(np.float16)
	input_y = np.random.uniform(-100100,[total_length_imm, ]).astype(np.float16)
	golden = (input_x + input y).astype(np.float16)
	input_x.tofile("-/input/input_x.bin")
	input y.tofile(" -/input/inputy.bin")
	golden.tofile("-/output/golden.bin")

2.3 main.cpp

  • 固定shape
size_t inputByteSize = 8* 2048* sizeof(uint16_t);
size_t outputBytesize = 8* 2048 * sizeof(uint16_t);
uint32_t blockDim = 8;
  • 动态shape
uint8_t* tiling = (uint8_t*)tik2: : GmAlloc(tilingSize);
ReadFile("./input/tiling.bin" , tilingSize, tiling, tilingSize);uint32_t blockDim =(*(const uint32_t*) (tiling));

size_t inputByteSize = blockDim *2048 * sizeof(uint16_t);size_t outputByteSize = blockDim * 2048 * sizeof(uint16_t);
//========================================

aclrtMallocHost((void**) (&tilingHost), tilingSize);
ReadFile("./input/tiling.bin", tilingSize, tilingHost,tilingSize);

uint32_t blockDim =(*(const uint32_t*) (tilingHost));
size_t inputByteSize = blockDim * 2048 * sizeof(uint16_t);
size_t outputByteSize = blockDim * 2048 * sizeof(uint16_t);

2.4 代码文件对比总结

2固态对比动态.png

2.5 TiKi C++算子在不同模式下实操演示

CPU下运行结果
2.51.png
NPU下运行结果
2.52.png

3 CPU 模式下对算子功能的调试

  • 使用GDB调试
    • 使用printf进行调试(或者std::cout)也可以
  • 3.png
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值