- 正文前感谢昇腾各位工作人员,没有你们辛勤付出就没有我们的进步
- 本文的立意在于解释核间均分 核内均分 核间不可均分 核内不可均分的定义
- 重点分析下述代码,代码来源为gitee sample/operator中的Addcdiv算子
- 这里面涉及了很多新的定义,如果不熟悉tiling的本质很容易混淆
if ((totalLengthAligned / ALIGN_NUM) % block_dim == 0) { //核间可均分
blockLength = totalLengthAligned / block_dim;//核均分
tile_num = blockLength / ALIGN_NUM / ub_block_num;//当blockLength/ALIGN_NUM < ub_block_num
if ((totalLengthAligned / block_dim / ALIGN_NUM) % ub_block_num == 0 ||
tile_num == 0) {
//满足32字节对齐,可以核内均分
if (tile_num == 0) {
tile_num = 1;
}
if (blockLength < ub_block_num* ALIGN_NUM) {
tileLength = ((blockLength / ALIGN_NUM) + 1) / 2 * 2 * ALIGN_NUM;
lasttileLength = tileLength;
} else {
tileLength = ub_block_num * ALIGN_NUM;
lasttileLength = tileLength;
}
} else { //满足32字节对齐,核内不能均分
tile_num = tile_num + 1;
tileLength = ub_block_num * ALIGN_NUM;
lasttileLength = blockLength - (tile_num - 1) * tileLength;
}
context->SetTilingKey(1);
tiling.set_blockLength(blockLength);
tiling.set_tileNum(tile_num);
tiling.set_tileLength(tileLength);
tiling.set_lasttileLength(lasttileLength);
tiling.SaveToBuffer(context->GetRawTilingData()->GetData(),
context->GetRawTilingData()->GetCapacity());
context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
size_t* currentWorkspace = context->GetWorkspaceSizes(1);
currentWorkspace[0] = 0;
return ge::GRAPH_SUCCESS;
} else {//核间不可均分
uint32_t formerNum = (totalLengthAligned / ALIGN_NUM) % block_dim;
uint32_t tailNum = block_dim - formerNum;
// 计算大块和小块的数据量
// uint32_t formerLength = ((totalLengthAligned / BLOCK_DIM + ALIGN_NUM - 1) / ALIGN_NUM) * ALIGN_NUM;
//uint32_t tailLength = (totalLengthAligned / BLOCK_DIM / ALIGN_NUM) * ALIGN_NUM;
uint32_t formerLength =
(((totalLengthAligned + block_dim - 1) / block_dim + ALIGN_NUM - 1) /
ALIGN_NUM) *ALIGN_NUM;
uint32_t tailLength =
(totalLengthAligned / block_dim / ALIGN_NUM) * ALIGN_NUM;
bool isformershare = true;
uint32_t former_tile_num = formerLength / ALIGN_NUM / ub_block_num;
if ((formerLength / ALIGN_NUM) % ub_block_num == 0 ||
former_tile_num == 0) { //核内均分
if (former_tile_num == 0) {
former_tile_num = 1;
}
if (formerLength < ub_block_num * ALIGN_NUM) {
formertileLength = ((formerLength / ALIGN_NUM) + 1) / 2 * 2 * ALIGN_NUM;
formerlasttileLength = formertileLength;
} else {
formertileLength = ub_block_num * ALIGN_NUM;
formerlasttileLength = formertileLength;
}
} else {
isformershare = false;
former_tile_num = former_tile_num + 1;
formertileLength = ub_block_num * ALIGN_NUM;
formerlasttileLength =
(formerLength - (former_tile_num - 1) * formertileLength);
}
bool istailshare = true;
uint32_t tail_tile_num = tailLength / ALIGN_NUM / ub_block_num;
uint32_t tailtileLength;
uint32_t taillasttileLength;
if ((tailLength / ALIGN_NUM) % ub_block_num == 0 ||
tail_tile_num == 0) { //核内可以均分
if (tail_tile_num == 0) {
tail_tile_num = 1;
}
if (tailLength < (ub_block_num * ALIGN_NUM)) {
tailtileLength = ((tailLength / ALIGN_NUM) + 1) / 2 * 2 * ALIGN_NUM;
taillasttileLength = tailtileLength;
} else {
tailtileLength = ub_block_num * ALIGN_NUM;
taillasttileLength = tailtileLength;
}
} else { //核内不均分
istailshare = false;
tail_tile_num = tail_tile_num + 1;
tailtileLength = ub_block_num * ALIGN_NUM;
taillasttileLength = (tailLength - (tail_tile_num - 1) * tailtileLength);
}
tiling.set_formerNum(formerNum);// 添加tiling字段,分配到较多数据量的核心数,即大块
tiling.set_formerLength(formerLength);// 添加tiling字段,大块的长度
tiling.set_formertileNum(former_tile_num);
tiling.set_formertileLength(formertileLength);
tiling.set_formerlasttileLength(formerlasttileLength);
tiling.set_tailNum(tailNum);// 添加tiling字段,分配到较少数据量的核心数,即小块
tiling.set_tailLength(tailLength);// 添加tiling字段,小块的长度
tiling.set_tailtileNum(tail_tile_num);
tiling.set_tailtileLength(tailtileLength);
tiling.set_taillasttileLength(taillasttileLength);
context->SetTilingKey(2);
tiling.SaveToBuffer(context->GetRawTilingData()->GetData(),
context->GetRawTilingData()->GetCapacity());
context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
size_t* currentWorkspace = context->GetWorkspaceSizes(1);
currentWorkspace[0] = 0;
return ge::GRAPH_SUCCESS;
}
复制
- 如下图是核间可均分和核间不可均分区别
- 总共是{4,3}的数据,如果分4个核AI_CORE,则每个核可分{1,3}数据,如下图左下角
- 如果分5个核,核间不可均分,则12%5 = 2 ,两个大核三个小核
- 大核心占3个数据,小核心占2个数据,总数12个数据,如右图所示
- 核内可均分如左下图所示
- 核内不可均分如右下图所示
- 所有计算省略了ALIGN对齐数据