当 ucx --with-cuda 时做了什么

1,找一只活麻雀,下载编译 ucx

git clone https://github.com/openucx/ucx.git
cd ucx/
git checkout v1.16.0
./autogen.sh
./autogen.sh
mkdir build
cd build
../contrib/configure-devel --with-cuda=/usr/local/cuda --without-rocm --without-java --prefix=${PWD}/../../local_d_nv


make -j
make install

2,运行 ucx 普通示例

ls build/test/apps/


示例解析:

test_tcmalloc

未完待续 。。。。

3,运行 ucx cuda 相关示例

 $ ./test_cuda_hook_static

示例解析:

test_cuda_hook_static

未完待续 。。。。

4,挖掘更深入的 ucx cuda 功能

5,剖析 ucx cuda 功能的达成

6,ucx 使用了那些 cuda API

cuda 开头的:

cudaEventQuery

cudaEventCreateWithFlags

cudaEventDestroy

cudaStreamCreateWithFlags

cudaStreamDestroy

cudaMemcpyDefault

cudaMemcpyAsync

cudaFreeHost

cudaMallocFromPoolAsync

cudaGetErrorString

cudaSetDevice

cudaDeviceSynchronize

cudaGetDeviceCount

cudaFree

cudaMallocManaged

cudaMalloc

cudaMemcpy

cudaMemset

cudaFreeAsync

cudaMallocAsync

cudaMallocPitch

cudaEventDestroy

cudaHostUnregister

cudaHostRegister

cudaHostRegisterPortable

cudaStreamSynchronize

cudaEventRecord

cudaErrorUnsupportedPtxVersion???????????

cu开头的driver api:

cuDeviceGetName

cuPointerGetAttribute

cuMemGetAddress

cuPointerGetAttributes

cuMemRangeGetAttribute

cuMemGetHandleForAddressRange

cuPointerSetAttribute

cuDeviceGetCount

cuEventQuery

cuLaunchHostFunc

cuStreamAddCallback

cuEventCreate

cuStreamCreate

cuStreamDestroy

cuMemcpyDtoDAsync(dst, src, iov[0].length,

cuEventRecord(cuda_ipc_event->event,

cuIpcOpenMemHandle(mapped_addr, key->ph,

cuIpcCloseMemHandle((CUdeviceptr)region->mapped_addr));

cuIpcGetMemHandle(&key->ph, (CUdeviceptr)addr)


cuDeviceGetAttribute(&attrib, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID,

cuGetErrorString(result, &error_str)

cuCtxGetDevice(&cuda_device)

cuMemGetAddressRange(&pbase, &length, ptr)

cuMemAlloc

cuMemAlloc_v2

cuMemAllocManaged

cuMemAllocPitch

cuMemAllocPitch_v2

cuMemAllocAsync

cuMemAllocFromPoolAsync

cuMemFree_v2

cuMemFreeHost_v2

cuDeviceTotalMem

cuDeviceTotalMem_v2

cuCtxDestroy(m_context)

cuMemAllocHost(&ptr, 64)

cuMemFreeHost(ptr)

cuMemAllocManaged(&dptr, 64, CU_MEM_ATTACH_GLOBAL)

cuMemFree(dptr)

cuMemAllocPitch(&dptr, &pitch, width, height, element_size)

cuMemAllocAsync(&dptr, 64, 0)

cuMemFreeAsync(dptr, 0);

cuDeviceGet(&device, 0)

cuCtxCreate(&context, 0, device)

cuMemAlloc(&dptr, 4096)

cuMemFree(dptr)

cuCtxDetach(context)

cuGetErrorString(_cu_result, &_error_string)

cuInit(0)

cuCtxGetCurrent(&cu_context)

cuDeviceGetUuid ();

7,ucx cuda VS cux rocm

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值