【无标题】

构造基于Pthreads的并行for循环分解、分配和执行机制

1) 基于pthreads的多线程库提供的基本函数,如线程创建、线程join、线程同步等。构建parallel_for函数对循环分解、分配和执行机制,函数参数包括但不限于(int start, int end, int increment, void *(functor)(void), void *arg , int num_threads);其中start为循环开始索引;end为结束索引;increment每次循环增加索引数;functor为函数指针,指向的需要被并行执行循环程序块;arg为functor的入口参数;num_threads为并行线程数。

2) 在Linux系统中将parallel_for函数编译为.so文件,由其他程序调用。

3) 将通用矩阵乘法的for循环,改造成基于parallel_for函数并行化的矩阵乘法,注意只改造可被并行执行的for循环(例如无race condition、无数据依赖、无循环依赖等)。

代码段:

parallel.c

#include<stdlib.h>
#include<stdio.h>
#include<time.h>
#include<pthread.h>
#include<parallel.h>

int *A,*B,*C;
int M,time1,time2;
int thread_count;

struct for_index
{
    int start;
    int end;
    int increment;
};

void* functor(void * argv){
    struct for_index* index = (struct for_index*) argv;
    int start = index->start;
    int end = index->end;
    int increment = index->increment;

    for(int i = start;i <= end;i+=increment){
        for(int j = 0;j < M;++j){
            int temp = 0;
            for(int k = 0;k < M;++k){
                temp += A[i*M+k]*B[k*M+j];
            }
            C[i*M+j] = temp;
        }
    }
}

void parallel_for(int start, int end, int increment, void*(*functor)(void*), void *arg, int num_threads){
    pthread_t p_thread[thread_count];
    int counts = end-start;
    int threads = num_threads;
    if(num_threads >= counts) threads = counts;
    int average_loop = counts/num_threads;

    for(int i = 0;i < threads;++i){
        struct for_index* index = (struct for_index*)malloc(sizeof(struct for_index));
        index->start = average_loop*i;
        index->end = increment;
        if(i < threads-1){
            index->end = average_loop*(i+1)-1;
        }
        else{
            index->end = counts - 1;
        }
        pthread_create(&(p_thread[i]),NULL,functor,(void*)index);
    }

    for(int i = 0; i < threads;i++){
        pthread_join(p_thread[i],NULL);
    }
}

parallel.h

#ifndef _PARALLEL_FOR_H_
#define _PARALLEL_FOR_H_
#include<stdio.h>
void parallel_for(int start, int end, int increment, void*(*functor)(void*), void *arg, int num_threads);
void* functor(void * argv);
#endif

program.c

#include<stdlib.h>
#include<stdio.h>
#include<time.h>
#include<pthread.h>
#include<parallel.h>

int *A,*B,*C;
int M,time1,time2;
int thread_count;

// void print_matrix(int* A){
//     for(int i = 0;i < M;++i){
//         for(int j = 0;j < M;++j){
//             printf("%d ",A[i*M+j]);
//         }
//         printf("\n");
//     }
// }

struct for_index
{
    int start;
    int end;
    int increment;
};


void* functor(void * argv){
    struct for_index* index = (struct for_index*) argv;
    int start = index->start;
    int end = index->end;
    int increment = index->increment;

    for(int i = start;i <= end;i+=increment){
        for(int j = 0;j < M;++j){
            int temp = 0;
            for(int k = 0;k < M;++k){
                temp += A[i*M+k]*B[k*M+j];
            }
            C[i*M+j] = temp;
        }
    }
}

void parallel_for(int start, int end, int increment, void*(*functor)(void*), void *arg, int num_threads){
    pthread_t p_thread[thread_count];
    int counts = end-start;
    int threads = num_threads;
    if(num_threads >= counts) threads = counts;
    int average_loop = counts/num_threads;

    for(int i = 0;i < threads;++i){
        struct for_index* index = (struct for_index*)malloc(sizeof(struct for_index));
        index->start = average_loop*i;
        index->end = increment;
        if(i < threads-1){
            index->end = average_loop*(i+1)-1;
        }
        else{
            index->end = counts - 1;
        }
        pthread_create(&(p_thread[i]),NULL,functor,(void*)index);
    }

    for(int i = 0; i < threads;i++){
        pthread_join(p_thread[i],NULL);
    }
}

int main(int argc, char ** argv){
    M = atoi(argv[1]);
    thread_count = atoi(argv[2]);

    A = (int*)malloc(sizeof(int)*M*M);
    B = (int*)malloc(sizeof(int)*M*M);
    C = (int*)malloc(sizeof(int)*M*M);
    for(int i = 0;i < M*M;++i){
        A[i] = rand()%5;
        B[i] = rand()%5;
        C[i] = 0;
    }

    time1 = clock();
    parallel_for(0,M,1,functor,NULL,thread_count);
    time2 = clock();
    // printf("A is\n");
    // print_matrix(A);
    // printf("B is\n");
    // print_matrix(B);
    // printf("C is\n");
    // print_matrix(C);
    free(A);
    free(B);
    free(C);
    printf("Time is:%.6f",(time2-time1)/CLK_TCK);
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值