c语言运行程序慢,C语言 Pthread程序在线程增加时运行较慢

我是并行编程的初学者,我尝试用pthread库编写并行程序.我在8处理器计算机上运行程序.问题在于,当我增加NumProcs时,每个线程都会减慢,尽管它们的任务总是相同的.有人可以帮我弄清楚发生了什么吗?

`

#define MAX_NUMP 16

using namespace std;

int NumProcs;

pthread_mutex_t SyncLock; /* mutex */

pthread_cond_t SyncCV; /* condition variable */

int SyncCount; /* number of processors at the barrier so far */

pthread_mutex_t ThreadLock; /* mutex */

// used only in solaris. use clock_gettime in Linux

//hrtime_t StartTime;

//hrtime_t EndTime;

struct timespec StartTime;

struct timespec EndTime;

void Barrier()

{

int ret;

pthread_mutex_lock(&SyncLock); /* Get the thread lock */

SyncCount++;

if(SyncCount == NumProcs) {

ret = pthread_cond_broadcast(&SyncCV);

assert(ret == 0);

} else {

ret = pthread_cond_wait(&SyncCV, &SyncLock);

assert(ret == 0);

}

pthread_mutex_unlock(&SyncLock);

}

/* The function which is called once the thread is allocated */

void* ThreadLoop(void* tmp)

{

/* each thread has a private version of local variables */

long threadId = (long) tmp;

int ret;

int startTime, endTime;

int count=0;

/* ********************** Thread Synchronization*********************** */

Barrier();

/* ********************** Execute Job ********************************* */

startTime = clock();

for(int i=0;i<65536;i++)

for(int j=0;j<1024;j++)

count++;

endTime = clock();

printf("threadid:%ld, time:%d\n",threadId,endTime-startTime);

}

int main(int argc, char** argv)

{

pthread_t* threads;

pthread_attr_t attr;

int ret;

int dx;

if(argc != 2) {

fprintf(stderr, "USAGE: %s \n", argv[0]);

exit(-1);

}

assert(argc == 2);

NumProcs = atoi(argv[1]);

assert(NumProcs > 0 && NumProcs <= MAX_NUMP);

/* Initialize array of thread structures */

threads = (pthread_t *) malloc(sizeof(pthread_t) * NumProcs);

assert(threads != NULL);

/* Initialize thread attribute */

pthread_attr_init(&attr);

pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); // sys manages contention

/* Initialize mutexs */

ret = pthread_mutex_init(&SyncLock, NULL);

assert(ret == 0);

ret = pthread_mutex_init(&ThreadLock, NULL);

assert(ret == 0);

/* Init condition variable */

ret = pthread_cond_init(&SyncCV, NULL);

assert(ret == 0);

SyncCount = 0;

Count = 0;

/* get high resolution timer, timer is expressed in nanoseconds, relative

* to some arbitrary time.. so to get delta time must call gethrtime at

* the end of operation and subtract the two times.

*/

//StartTime = gethrtime();

ret = clock_gettime(CLOCK_MONOTONIC, &StartTime);

for(dx=0; dx < NumProcs; dx++) {

/* ************************************************************

* pthread_create takes 4 parameters

* p1: threads(output)

* p2: thread attribute

* p3: start routine, where new thread begins

* p4: arguments to the thread

* ************************************************************ */

ret = pthread_create(&threads[dx], &attr, ThreadLoop, (void*) dx);

assert(ret == 0);

}

/* Wait for each of the threads to terminate */

for(dx=0; dx < NumProcs; dx++) {

ret = pthread_join(threads[dx], NULL);

assert(ret == 0);

}

//EndTime = gethrtime();

ret = clock_gettime(CLOCK_MONOTONIC, &EndTime);

printf("Time = %ld nanoseconds\n", EndTime.tv_nsec - StartTime.tv_nsec);

pthread_mutex_destroy(&ThreadLock);

pthread_mutex_destroy(&SyncLock);

pthread_cond_destroy(&SyncCV);

pthread_attr_destroy(&attr);

return 0;

}

你的意见是预期的.

通常影响这种情况的主要因素(工人在本地计算上旋转)是:

>比率nb_threads / nb_available_machine_cores

>每个线程的亲和力

这里的最佳方案是当比率为1时,每个线程与其中一个核心具有唯一的亲和力.

我们的想法是最大化每个核心吞吐量.你可以通过在每个核心上运行一个且只有一个线程来实现.如果增加线程数(比率> 1),多个线程将共享同一个内核,迫使内核(通过任务调度程序)在每个线程的执行之间切换.这就是你所观察到的.

每次内核必须操作这样的开关时,您需要支付上下文切换.它可能会成为明显的开销.

注意:

您可以使用pthread_setaffinity设置线程的亲缘关系.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值