将 cuda kernel 编译成 ptx 和 rocm的hip asm

1,cuda 源码编译

cuda_a_one.cu 

__global__ void NNNNNVVVVV_one(int *A)
{
	A[333] = 777;
}

编译命令:

%.ptx: %.cu
    nvcc -arch=sm_70 -ptx $< -o $@

生成的结果:

2, hip 源码编译

hip_a_one.hip

__global__ void AAAAAMMMMM_one(int *A)
{
	A[0x333] = 0x777;
}

编译命令:

%.hip.s: %.hip
    $(HIPCC) $< -o $@ -S --offload-device-only

生成的结果:

存储为文本:

	.text
	.amdgcn_target "amdgcn-amd-amdhsa--gfx906"
	.protected	_Z14AAAAAMMMMM_onePi    ; -- Begin function _Z14AAAAAMMMMM_onePi
	.globl	_Z14AAAAAMMMMM_onePi
	.p2align	8
	.type	_Z14AAAAAMMMMM_onePi,@function
_Z14AAAAAMMMMM_onePi:                   ; @_Z14AAAAAMMMMM_onePi
; %bb.0:
	s_load_dwordx2 s[0:1], s[4:5], 0x0
	v_mov_b32_e32 v0, 0
	v_mov_b32_e32 v1, 0x777
	s_waitcnt lgkmcnt(0)
	global_store_dword v0, v1, s[0:1] offset:3276
	s_endpgm
	.section	.rodata,#alloc
	.p2align	6, 0x0
	.amdhsa_kernel _Z14AAAAAMMMMM_onePi
		.amdhsa_group_segment_fixed_size 0
		.amdhsa_private_segment_fixed_size 0
		.amdhsa_kernarg_size 8
		.amdhsa_user_sgpr_count 6
		.amdhsa_user_sgpr_private_segment_buffer 1
		.amdhsa_user_sgpr_dispatch_ptr 0
		.amdhsa_user_sgpr_queue_ptr 0
		.amdhsa_user_sgpr_kernarg_segment_ptr 1
		.amdhsa_user_sgpr_dispatch_id 0
		.amdhsa_user_sgpr_flat_scratch_init 0
		.amdhsa_user_sgpr_private_segment_size 0
		.amdhsa_uses_dynamic_stack 0
		.amdhsa_system_sgpr_private_segment_wavefront_offset 0
		.amdhsa_system_sgpr_workgroup_id_x 1
		.amdhsa_system_sgpr_workgroup_id_y 0
		.amdhsa_system_sgpr_workgroup_id_z 0
		.amdhsa_system_sgpr_workgroup_info 0
		.amdhsa_system_vgpr_workitem_id 0
		.amdhsa_next_free_vgpr 2
		.amdhsa_next_free_sgpr 6
		.amdhsa_reserve_vcc 0
		.amdhsa_reserve_flat_scratch 0
		.amdhsa_reserve_xnack_mask 1
		.amdhsa_float_round_mode_32 0
		.amdhsa_float_round_mode_16_64 0
		.amdhsa_float_denorm_mode_32 3
		.amdhsa_float_denorm_mode_16_64 3
		.amdhsa_dx10_clamp 1
		.amdhsa_ieee_mode 1
		.amdhsa_fp16_overflow 0
		.amdhsa_exception_fp_ieee_invalid_op 0
		.amdhsa_exception_fp_denorm_src 0
		.amdhsa_exception_fp_ieee_div_zero 0
		.amdhsa_exception_fp_ieee_overflow 0
		.amdhsa_exception_fp_ieee_underflow 0
		.amdhsa_exception_fp_ieee_inexact 0
		.amdhsa_exception_int_div_zero 0
	.end_amdhsa_kernel
	.text
.Lfunc_end0:
	.size	_Z14AAAAAMMMMM_onePi, .Lfunc_end0-_Z14AAAAAMMMMM_onePi
                                        ; -- End function
	.section	.AMDGPU.csdata
; Kernel info:
; codeLenInByte = 36
; NumSgprs: 10
; NumVgprs: 2
; ScratchSize: 0
; MemoryBound: 0
; FloatMode: 240
; IeeeMode: 1
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
; VGPRBlocks: 0
; NumSGPRsForWavesPerEU: 10
; NumVGPRsForWavesPerEU: 2
; Occupancy: 8
; WaveLimiterHint : 1
; COMPUTE_PGM_RSRC2:SCRATCH_EN: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 6
; COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
	.ident	"AMD clang version 17.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-6.0.2 24012 af27734ed982b52a9f1be0f035ac91726fc697e4)"
	.section	".note.GNU-stack"
	.addrsig
	.amdgpu_metadata
---
amdhsa.kernels:
  - .args:
      - .address_space:  global
        .offset:         0
        .size:           8
        .value_kind:     global_buffer
    .group_segment_fixed_size: 0
    .kernarg_segment_align: 8
    .kernarg_segment_size: 8
    .language:       OpenCL C
    .language_version:
      - 2
      - 0
    .max_flat_workgroup_size: 1024
    .name:           _Z14AAAAAMMMMM_onePi
    .private_segment_fixed_size: 0
    .sgpr_count:     10
    .sgpr_spill_count: 0
    .symbol:         _Z14AAAAAMMMMM_onePi.kd
    .uniform_work_group_size: 1
    .uses_dynamic_stack: false
    .vgpr_count:     2
    .vgpr_spill_count: 0
    .wavefront_size: 64
amdhsa.target:   amdgcn-amd-amdhsa--gfx906
amdhsa.version:
  - 1
  - 2
...

	.end_amdgpu_metadata

3,hipcc 的概述编译流程

首先,hipcc是一个perl脚本:

#!/usr/bin/env perl
# Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

# Need perl > 5.10 to use logic-defined or
use 5.006; use v5.10.1;

use warnings;

use File::Basename;
use File::Spec::Functions 'catfile';

# TODO: By default select perl script until change incorporated in HIP build script.
my $USE_PERL_SCRIPT = $ENV{'HIP_USE_PERL_SCRIPTS'};
$USE_PERL_SCRIPT //= 1;  # use defined-or assignment operator.  Use env var, but if not defined default to 1.

my $isWindows =  ($^O eq 'MSWin32' or $^O eq 'msys');
# escapes args with quotes SWDEV-341955
foreach $arg (@ARGV) {
  if ($isWindows) {
    $arg =~ s/[^-a-zA-Z0-9_=+,.:\/\\ ]/\\$&/g;
  }
}

my $SCRIPT_DIR=dirname(__FILE__);
if ($USE_PERL_SCRIPT) {
    #Invoke hipcc.pl
    my $HIPCC_PERL=catfile($SCRIPT_DIR, '/hipcc.pl');
    system($^X, $HIPCC_PERL, @ARGV);
} else {
    $BIN_NAME="/hipcc.bin";
    if ($isWindows) {
        $BIN_NAME="/hipcc.bin.exe";
    }
    my $HIPCC_BIN=catfile($SCRIPT_DIR, $BIN_NAME);
    if ( -e $HIPCC_BIN ) {
        #Invoke hipcc.bin
        system($HIPCC_BIN, @ARGV);
    } else {
        print "hipcc.bin not present; install HIPCC binaries before proceeding\n";
        exit(-1);
    }
}

# Because of this wrapper we need to check
# the output of the system command for perl and bin
# else the failures are ignored and build fails silently
if ($? == -1) {
    exit($?);
}
elsif ($? & 127) {
    exit($?);
}
else {
     $CMD_EXIT_CODE = $? >> 8;
}
exit($CMD_EXIT_CODE);

具体工作流程:

未完待续。。。

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值