资料杂乱无章,各种思维的编程(历经心酸有以下体会)
废话不多说,先贴上最简洁的代码#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include<stdlib.h>
#define threadsperblock 128
#define Matrix_size 256
__global__ static void Mulitkernel(const float* a, const float* b, float* c,int n)
{
int tid = threadIdx.x;
int bid = blockIdx.x;
int idx = bid*threadsperblock + tid;
int row = idx / n;
int col = idx%n;
float Cvalue = 0;
for (int i = 0; i < n; i++)
{
Cvalue += a[row*n + i] * b[i*n + col];
}
c[row*n + col] = Cvalue;
}
void matgen(float* a, int n)
{
int i, j;
for (i = 0; i < n; i++)
for (j = 0; j <n; j++)
a[i*n + j] = (float)rand() / RAND_MAX + (float)rand() / (RAND_MAX*RAND_MAX);
}
int main()
{
float *a, *b, *c, *d;
int n = Matrix_size;
a = (float*)malloc(
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include<stdlib.h>
#define threadsperblock 128
#define Matrix_size 256
__global__ static void Mulitkernel(const float* a, const float* b, float* c,int n)
{
int tid = threadIdx.x;
int bid = blockIdx.x;
int idx = bid*threadsperblock + tid;
int row = idx / n;
int col = idx%n;
float Cvalue = 0;
for (int i = 0; i < n; i++)
{
Cvalue += a[row*n + i] * b[i*n + col];
}
c[row*n + col] = Cvalue;
}
void matgen(float* a, int n)
{
int i, j;
for (i = 0; i < n; i++)
for (j = 0; j <n; j++)
a[i*n + j] = (float)rand() / RAND_MAX + (float)rand() / (RAND_MAX*RAND_MAX);
}
int main()
{
float *a, *b, *c, *d;
int n = Matrix_size;
a = (float*)malloc(