pthread多线程加速示例(大型矩阵乘法)

#include <cmnheader.h>
#include <time.h>
#include "MatrixLib.h"
#pragma comment(lib,"MatrixLib.lib")
#pragma warning(disable:4996)

void  checkResult(char* str, int value, FILE* pflog) 
{ 
    if (value != 0) 
    { 
	    fprintf(pflog, "Failed with %d at %s", value, str); 
	    exit(1); 
    } 
}

typedef struct
{
	FILE* pflog;
	double** R;
	double** A;
	double** B;
	int start_row;
	int end_row;
	int num_col;
} threadParm_t;

void *oneThread(void *param)
{
	threadParm_t *p = (threadParm_t *)param;
	fprintf(p->pflog, "# Thread  \'%.8X %.8X\'  is now running.\n", getpid());
	double** R = p->R;
	double** A = p->A;
	double** B = p->B;
	int start_row = p->start_row;
	int end_row = p->end_row;
	int num_col = p->num_col;
	double tmp;
	for (int i = start_row; i < end_row; ++i)
	{
		for (int j = 0; j < num_col; ++j)
		{
			tmp = 0;
			for (int k = 0; k < num_col; ++k)
			{
				tmp += A[i][k] * B[j][k];
			}
			R[i][j] = tmp;
		}
	}

	return NULL;
}

void OneTry(const int N, const int C,FILE* pflog)
{
	fprintf(pflog,"== %4d * %4d Matrix Multiply, %d Threads. ==\n", N, N, C);
	clock_t start = clock();
	double** X = NewSquareMatrix(N);
	double** Y = NewSquareMatrix(N);
	double** Z = NewSquareMatrix(N);
	TransformSquareMat(Z, N); // 转置一次

	int start_row = 0, end_row = 0;
	int  inc_row = N / C;
	end_row = start_row + inc_row;
	int i, rc;
	pthread_t* threads = new pthread_t[C];
	threadParm_t* tparams = new threadParm_t[C];
	for (i = 0; i < C; ++i)
	{
		tparams[i].pflog = pflog;
		tparams[i].R = X;
		tparams[i].A = Y;
		tparams[i].B = Z;
		tparams[i].num_col = N;
		tparams[i].start_row = start_row;
		tparams[i].end_row = end_row;
		start_row = end_row + 1;
		end_row += inc_row;
		rc = pthread_create(&threads[i], NULL, oneThread, & tparams[i]);
		checkResult("!! pthread_create()\n", rc,pflog);
		fprintf(pflog,"**********  %2d of %2d threads created  **********\n", i + 1,C);
	}
	fprintf(pflog,"@ Waiting for worker threads' end...\n");
	int* status = new int[C];
	for (i = 0; i < C; ++i)
	{
		rc = pthread_join(threads[i], (void**)(&status[i]));
		checkResult("!! pthread_join()\n", rc,pflog);
	}
	fprintf(pflog,"@ Check all thread's results\n");
	for (i = 0; i < C; ++i)
	{
		if (status[i] != NULL)
		{
			fprintf(pflog,"!! Unexpected thread status\n");
		}
	}
	//TransformSquareMat(Z, N); // 恢复
	SafeDeleteSquareMat(X, N);
	SafeDeleteSquareMat(Y, N);
	SafeDeleteSquareMat(Z, N);
	clock_t finish = clock();
	fprintf(pflog,"@ All finished. Total time:%.8f(sec).\n\n",(finish-start)/(1.0*CLOCKS_PER_SEC));
}

int main(int argc, char **argv)
{
	FILE* pflog = fopen("trace_log.txt", "a");
	const int N = 4096, C = 32;
	printf("Matrix N=%d,Thread C=%d, now running...", N, C);
	time_t rawtime;
	time(&rawtime);
	tm* tminfo = localtime(&rawtime);
	fprintf(pflog, "NEW LOG @%s", asctime(tminfo));
	OneTry(4096,32,pflog);
	fflush(pflog);
	fclose(pflog);
	printf("finshed!\n");
	system("pause");
	return 0;
}


cmnheader.h同之前有关pthread的文章
MatrixLib.dll是自己写的,代码如下
Matrix.h文件内容

/*
* 矩阵操作的定义
* 用于导出DLL
*/
#ifndef MATRIX_LIB_H
#define MATRIX_LIB_H

// 兼容C版本导出符号
#ifdef __cplusplus    
// 定义DLLEXPORT时启用导出
#ifdef DLLEXPORT
#define MAPI extern "C" __declspec (dllexport)
// 未定义DLLEXPORT宏时即为导入
#else  /* DLLEXPORT */
#define MAPI extern "C" __declspec (dllimport)
#endif /* DLLEXPORT  */
#else /* __cplusplus     */
#ifdef DLLIMPORT
#define MAPI  __declspec (dllexport)
#else /* DLLIMPORT */
#define MAPI  __declspec (dllimport)
#endif /* DLLIMPORT */
#endif /* __cplusplus */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

MAPI double** NewSquareMatrix(const int n);
MAPI void TransformSquareMat(double** mat,const int n);
MAPI double  SafeDeleteSquareMat(double** mat, const int n);
MAPI double  SquareMatMultiply(double** R, double** A, double** B, const int n);
MAPI double  SquareMatMultiplyTrans(double** R,double** A,double** B,const int n);
MAPI double  RndInitSquareMat(double **mat, const int n);
MAPI void TraceLogInfo(char* strInfo, FILE* pflog = stdout);
MAPI void TraceLogDuration(double durationTime, FILE* pflog = stdout);

#endif /* MATRIX_LIB_H */


MatrixLib.cpp文件内容

// 启用DLLEXPORT宏定义,声明将要导出DLL
#define DLLEXPORT
#include "MatrixLib.h"

// 创建一个n阶方阵mat并分配内存,返回首地址
MAPI double **NewSquareMatrix(const int n)
{
	double** mat = new double*[n];
	for (int i = 0; i < n; ++i)
	{
		mat[i] = new double[n];
	}
	return mat;
}

// 方阵的转置
MAPI void TransformSquareMat(double** mat,const int n)
{
    double tmp;
	for(int i=0;i<n;++i)
	    for(int j=i+1;j<n;++j)
		{
			tmp=mat[i][j];
            mat[i][j]=mat[j][i];
            mat[j][i]=tmp;			
		}
}

// 删除方阵mat所占有的内存空间并置空指针
MAPI double SafeDeleteSquareMat(double** mat, const int n)
{
	clock_t start = clock();
	if (mat != NULL)
	{
		for (int i = 0; i < n; ++i)
		{
			delete[] mat[i];
			mat[i] = NULL;
		}
		delete[] mat;
		mat = NULL;
	}
	clock_t finish = clock();
	return (1.0*(finish - start) / CLOCKS_PER_SEC);
}

// 矩阵乘法(方阵) R<-A*B (正常版本)
MAPI double SquareMatMultiply(double** R, double** A, double** B,const int n)
{
	clock_t start = clock();
	for (int i = 0; i < n; ++i)
	{
		for (int j = 0; j < n; ++j)
		{
			double tmp = 0;
			for (int k = 0; k < n; ++k)
			{
				tmp += A[i][k] * B[k][j];
			}
			R[i][j] = tmp;
		}
	}
	clock_t finish = clock();
	return (1.0*(finish - start) / CLOCKS_PER_SEC);
}

// 矩阵乘法(方阵) R<-A*B, 转置加速版本
MAPI double SquareMatMultiplyTrans(double** R, double** A, double** B,const int n)
{
    // 增加了两次转置的时间占用
	// 利用辅助空间O(n^2)可降至一次
	clock_t start = clock();
	TransformSquareMat(B,n); // 转置一次
	for (int i = 0; i < n; ++i)
	{
		for (int j = 0; j < n; ++j)
		{
			double tmp = 0;
			for (int k = 0; k < n; ++k)
			{
				tmp += A[i][k] * B[j][k];  // 更少的跳跃(非跨行)
			}
			R[i][j] = tmp;
		}
	}
	TransformSquareMat(B,n); // 再次转置以恢复矩阵B
	clock_t finish = clock();
	return (1.0*(finish - start) / CLOCKS_PER_SEC);
}

// 以随机数填充矩阵,返回操作持续时间
MAPI double RndInitSquareMat(double **mat, const int n)
{
	clock_t start = clock();
	for (int i = 0; i < n; ++i)
	{
		for (int j = 0; j < n; ++j)
		{
			mat[i][j] = rand();
		}
	}
	clock_t finish = clock();
	return (1.0*(finish - start) / CLOCKS_PER_SEC);
}

// 将信息打印到(日志)文件
MAPI void TraceLogInfo(char* pszInfo, FILE* pflog)
{
	fprintf(pflog, "%s\n", pszInfo);
}

// 将信息打印到(日志)文件
MAPI void TraceLogDuration(double durationTime, FILE* pflog)
{
	fprintf(pflog, "DurationTime = %10.6f(sec).\n", durationTime);
}


日志片段

NEW LOG @Fri Apr 18 16:42:44 2014
== 4096 * 4096 Matrix Multiply, 32 Threads. ==
**********   1 of 32 threads created  **********
**********   2 of 32 threads created  **********
**********   3 of 32 threads created  **********
**********   4 of 32 threads created  **********
# Thread  '00000C48 00B728A0'  is now running.
# Thread  '00000C48 00B72B80'  is now running.
**********   5 of 32 threads created  **********
# Thread  '00000C48 00B72E60'  is now running.
# Thread  '00000C48 00B73140'  is now running.
**********   6 of 32 threads created  **********
**********   7 of 32 threads created  **********
**********   8 of 32 threads created  **********
**********   9 of 32 threads created  **********
**********  10 of 32 threads created  **********
**********  11 of 32 threads created  **********
**********  12 of 32 threads created  **********
**********  13 of 32 threads created  **********
**********  14 of 32 threads created  **********
**********  15 of 32 threads created  **********
**********  16 of 32 threads created  **********
**********  17 of 32 threads created  **********
# Thread  '00000C48 00B73640'  is now running.
**********  18 of 32 threads created  **********
**********  19 of 32 threads created  **********
**********  20 of 32 threads created  **********
**********  21 of 32 threads created  **********
**********  22 of 32 threads created  **********
**********  23 of 32 threads created  **********
**********  24 of 32 threads created  **********
**********  25 of 32 threads created  **********
**********  26 of 32 threads created  **********
**********  27 of 32 threads created  **********
**********  28 of 32 threads created  **********
**********  29 of 32 threads created  **********
**********  30 of 32 threads created  **********
**********  31 of 32 threads created  **********
**********  32 of 32 threads created  **********
@ Waiting for worker threads' end...
# Thread  '00000C48 00B73920'  is now running.
# Thread  '00000C48 00B731E8'  is now running.
# Thread  '00000C48 00B73290'  is now running.
# Thread  '00000C48 00B73338'  is now running.
# Thread  '00000C48 00B74248'  is now running.
# Thread  '00000C48 00B747F0'  is now running.
# Thread  '00000C48 00B74AD0'  is now running.
# Thread  '00000C48 00B74DB0'  is now running.
# Thread  '00000C48 00B78AB0'  is now running.
# Thread  '00000C48 00B7B8A8'  is now running.
# Thread  '00000C48 00B7BAA0'  is now running.
# Thread  '00000C48 00B7B950'  is now running.
# Thread  '00000C48 00B704C0'  is now running.
# Thread  '00000C48 00B781E0'  is now running.
# Thread  '00000C48 00B7ACD8'  is now running.
# Thread  '00000C48 00B7AED0'  is now running.
# Thread  '00000C48 00B7AE28'  is now running.
# Thread  '00000C48 00B7B170'  is now running.
# Thread  '00000C48 00B7B800'  is now running.
# Thread  '00000C48 00B7AD80'  is now running.
# Thread  '00000C48 00B7B9F8'  is now running.
# Thread  '00000C48 00B7B218'  is now running.
# Thread  '00000C48 00B7AF78'  is now running.
# Thread  '00000C48 00B7B020'  is now running.
# Thread  '00000C48 00B7B0C8'  is now running.
# Thread  '00000C48 00B7B2C0'  is now running.
# Thread  '00000C48 00B74510'  is now running.
@ Check all thread's results
@ All finished. Total time:44.18600000(sec).


  • 3
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值