cuda 学习(三) Page-Locked Host Memory

一、cudaHostAlloc的使用

#include <iostream>
#include <numeric>
#include <stdlib.h>

__global__ void add1(float* input){
    int idx = threadIdx.x;
    input[idx] += idx;
}
int main(void)
{
    float* temp;
    cudaHostAlloc(&temp, sizeof(float)*12, cudaHostAllocDefault);
    for(int i = 0; i < 12; ++i){
        temp[i] = i;
    }
    add1<<<1,12>>>(temp);
    for(int i = 0; i < 12; ++i){
        std::cout<< temp[i] << std::endl;
    }
    cudaFreeHost(temp);
    return 0;
}

二、cudaHostRegister与cudaHostGetDevicePointer使用

#include <iostream>
#include <numeric>
#include <stdlib.h>

__global__ void add1(float* input){
    int idx = threadIdx.x;
    input[idx] += idx;
}
int main(void)
{
    float* temp = (float*)malloc(sizeof(float)*12);
    cudaHostRegister(temp, sizeof(float)*12, cudaHostRegisterMapped);
    for(int i = 0; i < 12; ++i){
        temp[i] = i;
    }
    float* device;
    cudaHostGetDevicePointer(&device, temp, 0);
    add1<<<1,12>>>(device);
    for(int i = 0; i < 12; ++i){
        std::cout<< temp[i] << std::endl;
    }
    cudaHostUnregister(temp);
    return 0;
}

运行结果是:0 2 4 6 8 10 12 14 16 18 20 22

这两个函数有一个参数是flag,对应的含义为:

Portable Memory: a block of page-locked memory can be used in conjunction with any device in the
system
Write-Combining Memory: By default page-locked host memory is allocated as cacheable, it can improve the transportation from cpu to gpu.
Mapped Memory:a block of page-locked memory can be used in conjunction with any device in the
system
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值