关于核间均分 核内均分 核间不可均分 核内不可均分的一点小心得

  • 正文前感谢昇腾各位工作人员,没有你们辛勤付出就没有我们的进步
  • 本文的立意在于解释核间均分 核内均分 核间不可均分 核内不可均分的定义
  • 重点分析下述代码,代码来源为gitee sample/operator中的Addcdiv算子
  • 这里面涉及了很多新的定义,如果不熟悉tiling的本质很容易混淆

 if ((totalLengthAligned / ALIGN_NUM) % block_dim == 0) {  //核间可均分
    blockLength = totalLengthAligned / block_dim;//核均分
    tile_num = blockLength / ALIGN_NUM / ub_block_num;//当blockLength/ALIGN_NUM < ub_block_num
    if ((totalLengthAligned / block_dim / ALIGN_NUM) % ub_block_num == 0 ||
        tile_num == 0) {  
          //满足32字节对齐,可以核内均分
      if (tile_num == 0) {
        tile_num = 1;
      } 
      if (blockLength < ub_block_num* ALIGN_NUM) {
        tileLength = ((blockLength / ALIGN_NUM) + 1) / 2 * 2 * ALIGN_NUM;
        lasttileLength = tileLength;
      } else {
        tileLength = ub_block_num * ALIGN_NUM;
        lasttileLength = tileLength;
      }
    } else {  //满足32字节对齐,核内不能均分
      tile_num = tile_num + 1;
      tileLength = ub_block_num * ALIGN_NUM;
      lasttileLength = blockLength - (tile_num - 1) * tileLength;
    }
    context->SetTilingKey(1);
    tiling.set_blockLength(blockLength);
    tiling.set_tileNum(tile_num);
    tiling.set_tileLength(tileLength);
    tiling.set_lasttileLength(lasttileLength);

    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(),
                        context->GetRawTilingData()->GetCapacity());
    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
    size_t* currentWorkspace = context->GetWorkspaceSizes(1);
    currentWorkspace[0] = 0;
    return ge::GRAPH_SUCCESS;
  } else {//核间不可均分
    uint32_t formerNum = (totalLengthAligned / ALIGN_NUM) % block_dim;
    uint32_t tailNum = block_dim - formerNum;
    // 计算大块和小块的数据量
    //   uint32_t formerLength = ((totalLengthAligned / BLOCK_DIM + ALIGN_NUM - 1) / ALIGN_NUM) * ALIGN_NUM;
    //uint32_t tailLength = (totalLengthAligned / BLOCK_DIM / ALIGN_NUM) * ALIGN_NUM;
    uint32_t formerLength =
        (((totalLengthAligned + block_dim - 1) / block_dim + ALIGN_NUM - 1) /
         ALIGN_NUM) *ALIGN_NUM;
    uint32_t tailLength =
        (totalLengthAligned / block_dim / ALIGN_NUM) * ALIGN_NUM;

    bool isformershare = true;
    uint32_t former_tile_num = formerLength / ALIGN_NUM / ub_block_num;
    if ((formerLength / ALIGN_NUM) % ub_block_num == 0 ||
        former_tile_num == 0) {  //核内均分
      if (former_tile_num == 0) {
        former_tile_num = 1;
      }
      if (formerLength < ub_block_num * ALIGN_NUM) {
        formertileLength = ((formerLength / ALIGN_NUM) + 1) / 2 * 2 * ALIGN_NUM;
        formerlasttileLength = formertileLength;
      } else {
        formertileLength = ub_block_num * ALIGN_NUM;
        formerlasttileLength = formertileLength;
      }
    } else {
      isformershare = false;
      former_tile_num = former_tile_num + 1;

      formertileLength = ub_block_num * ALIGN_NUM;
      formerlasttileLength =
          (formerLength - (former_tile_num - 1) * formertileLength);
    }

    bool istailshare = true;
    uint32_t tail_tile_num = tailLength / ALIGN_NUM / ub_block_num;
    uint32_t tailtileLength;
    uint32_t taillasttileLength;
    if ((tailLength / ALIGN_NUM) % ub_block_num == 0 ||
        tail_tile_num == 0) {  //核内可以均分
      if (tail_tile_num == 0) {
        tail_tile_num = 1;
      }
      if (tailLength < (ub_block_num * ALIGN_NUM)) {
        tailtileLength = ((tailLength / ALIGN_NUM) + 1) / 2 * 2 * ALIGN_NUM;
        taillasttileLength = tailtileLength;
      } else {
        tailtileLength = ub_block_num * ALIGN_NUM;
        taillasttileLength = tailtileLength;
      }
    } else {  //核内不均分
      istailshare = false;
      tail_tile_num = tail_tile_num + 1;
      tailtileLength = ub_block_num * ALIGN_NUM;
      taillasttileLength = (tailLength - (tail_tile_num - 1) * tailtileLength);
    }
    tiling.set_formerNum(formerNum);// 添加tiling字段,分配到较多数据量的核心数,即大块
    tiling.set_formerLength(formerLength);// 添加tiling字段,大块的长度
    tiling.set_formertileNum(former_tile_num);
    tiling.set_formertileLength(formertileLength);
    tiling.set_formerlasttileLength(formerlasttileLength);
    tiling.set_tailNum(tailNum);// 添加tiling字段,分配到较少数据量的核心数,即小块
    tiling.set_tailLength(tailLength);// 添加tiling字段,小块的长度
    tiling.set_tailtileNum(tail_tile_num);
    tiling.set_tailtileLength(tailtileLength);
    tiling.set_taillasttileLength(taillasttileLength);
    context->SetTilingKey(2);
    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(),
                        context->GetRawTilingData()->GetCapacity());
    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
    size_t* currentWorkspace = context->GetWorkspaceSizes(1);
    currentWorkspace[0] = 0;
    return ge::GRAPH_SUCCESS;
  }复制

  • 如下图是核间可均分和核间不可均分区别
  • 总共是{4,3}的数据,如果分4个核AI_CORE,则每个核可分{1,3}数据,如下图左下角
  • 如果分5个核,核间不可均分,则12%5 = 2 ,两个大核三个小核
  • 大核心占3个数据,小核心占2个数据,总数12个数据,如右图所示

cke_77169.png

  • 核内可均分如左下图所示
  • 核内不可均分如右下图所示
  • 所有计算省略了ALIGN对齐数据

cke_4673.png

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值