cuda 入门 数组相加 矩阵相加

cuda 入门 数组相加 矩阵相加

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include <stdlib.h>

__global__ void VecAdd(float* A, float* B, float* C, int N){
    int i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i < N)
        C[i] = A[i] + B[i];
}

int main(){

    const int N = 5;
    size_t size = N * sizeof(float);

    //Allocate input vectors in host memory
    float* h_A = (float*)malloc(size);
    float* h_B = (float*)malloc(size);
    float* h_C = (float*)malloc(size);

    for (int i = 0; i < N; i++){
        h_A[i] = rand()%10;
        h_B[i] = rand()%10;
    }


    //Allocate vectors in device memory
    float* d_A;
    cudaMalloc(&d_A, size);
    float* d_B;
    cudaMalloc(&d_B, size);
    float* d_C;
    cudaMalloc(&d_C, size);

    //Copy vectors from host memory to device memory
    cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);

    //Invoke kernal
    int threadsPerBlock = 16*16;
    int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
    VecAdd <<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, N);


    //Copy result from device memory to host memory
    cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);

    for (int i = 0; i < N; i++){
        printf("%f%c", h_A[i], i == N - 1 ? '\n' : ' ');
    }

    for (int i = 0; i < N; i++){
        printf("%f%c", h_B[i], i == N - 1 ? '\n' : ' ');
    }

    for (int i = 0; i < N; i++){
        printf("%f%c", h_C[i], i == N - 1 ? '\n' : ' ');
    }

    //Free device memory
    cudaFree(d_A);
    cudaFree(d_B);
    cudaFree(d_C);

    //Free host memory
    free(h_A);
    free(h_B);
    free(h_C);

    system("pause");
    return 0;
}

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include <stdlib.h>

__global__ void MatAdd(float* A, float* B, float* C, int N , int M) {

    int i = threadIdx.x + blockIdx.x * blockDim.x;
    int j = threadIdx.y + blockIdx.y * blockDim.y;
    if (i < N && j < M)
        C[i * M + j] = A[i * M + j] + B[i * M + j];
}

int main() {

    const int N = 5;
    const int M = 3;
    size_t size = N * M * sizeof(float);

    //Allocate input vectors in host memory
    float* h_A = (float*)malloc(size);
    float* h_B = (float*)malloc(size);
    float* h_C = (float*)malloc(size);

    for (int i = 0; i < N*M; i++) {
        h_A[i] = rand() % 10;
        h_B[i] = rand() % 10;
    }


    //Allocate vectors in device memory
    float* d_A;
    cudaMalloc(&d_A, size);
    float* d_B;
    cudaMalloc(&d_B, size);
    float* d_C;
    cudaMalloc(&d_C, size);

    //Copy vectors from host memory to device memory
    cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);

    //Invoke kernal

    dim3 threadPerBlock(16, 16);
    dim3 numBlocks((N + threadPerBlock.x - 1) / (threadPerBlock.x), (M + threadPerBlock.y - 1) / (threadPerBlock.y));
    MatAdd <<< numBlocks, threadPerBlock >>> (d_A, d_B, d_C , N , M);


    //Copy result from device memory to host memory
    cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);

    for (int i = 0; i < N; i++) 
        for (int j = 0; j < M; j++) {
            printf("%f%c", h_A[i * M + j], j == M - 1 ? '\n' : ' ');
        }
    printf("**************\n");

    for (int i = 0; i < N; i++)
        for (int j = 0; j < M; j++) {
            printf("%f%c", h_B[i * M + j], j == M - 1 ? '\n' : ' ');
        }
    printf("**************\n");

    for (int i = 0; i < N; i++)
        for (int j = 0; j < M; j++) {
            printf("%f%c", h_C[i * M + j], j == M - 1 ? '\n' : ' ');
        }
    printf("**************\n");

    //Free device memory
    cudaFree(d_A);
    cudaFree(d_B);
    cudaFree(d_C);

    //Free host memory
    free(h_A);
    free(h_B);
    free(h_C);

    system("pause");
    return 0;
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值