cuda编程与gpu并行计算（六）：图稀疏矩阵转为CSR结构并传入gpu

最新推荐文章于 2023-10-04 10:48:53 发布

好想成为wqg啊

最新推荐文章于 2023-10-04 10:48:53 发布

阅读量1.5k

点赞数

文章标签： gpu cuda 图计算

本文链接：https://blog.csdn.net/qq_42812089/article/details/112055362

版权

虽然sepgraph有这部分代码，还是自己先试着实现一下，这样读起来也方便

行压缩格式Compressed Sparse Row (CSR)
CSR需要三种数据来表达：数值、列号、行转移。CSR不是三元组，而是整体的编码方式。

CSR

编码：
行优先遍历矩阵Matrix
values数组中保存矩阵中非零元素。
column indices数组保存values数组中对应位置非零元素的列索引。
row offsets数组的下标表示每一行第一个非零元素的行索引，元素值为values数组的下标，最后一个元素值为非零元素的个数。

解码:
遍历values数组，对其中的元素值x（下标记为index_X）去column indices数组中对应位置取出x在原矩阵中的列索引（记为col_index），然后在row offsets数组查找index_X，如果index_X在row offsets中，其在row offsets中的下标即为x在原矩阵中的行索引（记为row_index），如果index_X不在row offsets中，则使用上一个row_index值（因为编码时是逐行编码，且row offsets仅保存每行的第一个非零元素，所以当前x与上一x在同一行）。

此为前言，清楚逻辑，代码就好办了

#include<stdlib.h>
#include<time.h>
#include<iostream>
#include<stdio.h>
#include<string.h>
using namespace std;


__global__ void csr(int* d_in_1,int* d_in_2,float* d_in_3){
  
        int i = blockIdx.x * blockDim.x + threadIdx.x; 

	int start = d_in_1[i];
	int end = d_in_1[i + 1];
	
	for(int j = start;j < end ;j++){
		printf("(%d,%d) ",i,d_in_2[j]);
 	}

}


//随机生成一个图的邻接矩阵
void fill_random(float*data, int m, int n) {
	srand((unsigned)(time(NULL))); //每次生成的随机数不一样
	for (int i = 0; i < m*n; i++) {
		data[i] = rand() % 100;  //生成100以内的随机数
		if (data[i] < 80) 
			data[i] = 0;
	}
}

void print_matrix(float*data, int m, int n) {
	for (int i = 0; i < m; i++) {
		for (int j = 0; j < n; j++) {
			int l = i + j * m;
			cout << data[l] << " ";
		}
		cout << endl;
	}
	cout << endl;
}

void print_matrix(int*data, int m, int n) {
	for (int i = 0; i < m; i++) {
		for (int j = 0; j < n; j++) {
			int l = i + j * m;
			cout << data[l] << " ";
		}
		cout << endl;
	}

	cout << endl;
}

void dense2csr(float*data, int*&rowPtr, int*&colInd, float*&val, int m, int n) {
	rowPtr = (int*)malloc(sizeof(int)*(m + 1));

	int* tcolInd = (int*)malloc(sizeof(int)*(m *n));
	float* tval = (float*)malloc(sizeof(float)*(m *n));
	int towtal = m * n;
	int nnv = 0;

	for (int i = 0; i < m; i++) {
		rowPtr[i] = nnv;//记录行偏移，其实也是前面一共已经有多少边
		for (int j = 0; j < n; j++) {
			int l = i + j * m;
			if (data[l] != 0) {
				tcolInd[nnv] = j;//记录列索引
				tval[nnv] = data[l];//记录边权
				nnv++;//找到一个边，行偏移加一
			}
		}
	}
	rowPtr[m] = nnv;

	colInd = (int*)malloc(sizeof(int)*(nnv));
	val = (float*)malloc(sizeof(float)*(nnv));

	memcpy(colInd, tcolInd, sizeof(float)*nnv);
	memcpy(val, tval, sizeof(float)*nnv);

	free(tcolInd);
	free(tval);
}



int main() {
	int m = 5;
	int n = 5;
	float*A = (float*)malloc(sizeof(float)*m*n);

	fill_random(A, m, n);
	print_matrix(A, m, n);

	int*csrRowPtr;
	int*csrColInd;
	float*csrVal;
	dense2csr(A, csrRowPtr, csrColInd, csrVal, m, n);

        print_matrix(csrRowPtr, 1, m + 1);
	print_matrix(csrColInd, 1, csrRowPtr[m]);
	print_matrix(csrVal, 1, csrRowPtr[m]);


	const int ARRAY_SIZE = m + 1;
  	const int ARRAY_BYTES_1 = ARRAY_SIZE * sizeof(int);
	const int ARRAY_BYTES_2 = csrRowPtr[ARRAY_SIZE - 1] * sizeof(int);
	const int ARRAY_BYTES_3 = csrRowPtr[ARRAY_SIZE - 1] * sizeof(float);

	// 生成cpu上的csr结构
	int h_in_1[m + 1];
	int h_in_2[csrRowPtr[m]];
	float h_in_3[csrRowPtr[m]];
	for(int i = 0;i < m + 1 ; i++){
    	        h_in_1[i] = csrRowPtr[i];
  	}
	for(int i = 0;i < csrRowPtr[m]; i++){
		h_in_2[i] = csrColInd[i];
		h_in_3[i] = csrVal[i];
	}

  	// 声明GPU上的CSR结构
  	int* d_in_1;
  	int* d_in_2;
	float* d_in_3;

  	// 分配空间
  	cudaMalloc((void**) &d_in_1,ARRAY_BYTES_1);
  	cudaMalloc((void**) &d_in_2,ARRAY_BYTES_2);
	cudaMalloc((void**) &d_in_3,ARRAY_BYTES_3);

  	// 转移全部
  	cudaMemcpy(d_in_1,h_in_1,ARRAY_BYTES_1,cudaMemcpyHostToDevice);
	cudaMemcpy(d_in_2,h_in_2,ARRAY_BYTES_2,cudaMemcpyHostToDevice);
	cudaMemcpy(d_in_3,h_in_3,ARRAY_BYTES_3,cudaMemcpyHostToDevice);

  	// 操作全部
  	csr<<<1,ARRAY_SIZE>>>(d_in_1,d_in_2,d_in_3);


	// 释放内存
  	cudaFree(d_in_1);
	cudaFree(d_in_2);
	cudaFree(d_in_3);

	return 0;
}