【无标题】

最新推荐文章于 2024-10-09 10:07:13 发布

沈离尘

最新推荐文章于 2024-10-09 10:07:13 发布

阅读量185

点赞数

文章标签： c++ 开发语言

本文链接：https://blog.csdn.net/m0_51469755/article/details/130026832

版权

构造基于Pthreads的并行for循环分解、分配和执行机制

1）基于pthreads的多线程库提供的基本函数，如线程创建、线程join、线程同步等。构建parallel_for函数对循环分解、分配和执行机制，函数参数包括但不限于(int start, int end, int increment, void *(functor)(void), void *arg , int num_threads)；其中start为循环开始索引；end为结束索引；increment每次循环增加索引数；functor为函数指针，指向的需要被并行执行循环程序块；arg为functor的入口参数；num_threads为并行线程数。

2）在Linux系统中将parallel_for函数编译为.so文件，由其他程序调用。

3）将通用矩阵乘法的for循环，改造成基于parallel_for函数并行化的矩阵乘法，注意只改造可被并行执行的for循环（例如无race condition、无数据依赖、无循环依赖等）。

代码段：

parallel.c

#include<stdlib.h>
#include<stdio.h>
#include<time.h>
#include<pthread.h>
#include<parallel.h>

int *A,*B,*C;
int M,time1,time2;
int thread_count;

struct for_index
{
    int start;
    int end;
    int increment;
};

void* functor(void * argv){
    struct for_index* index = (struct for_index*) argv;
    int start = index->start;
    int end = index->end;
    int increment = index->increment;

    for(int i = start;i <= end;i+=increment){
        for(int j = 0;j < M;++j){
            int temp = 0;
            for(int k = 0;k < M;++k){
                temp += A[i*M+k]*B[k*M+j];
            }
            C[i*M+j] = temp;
        }
    }
}

void parallel_for(int start, int end, int increment, void*(*functor)(void*), void *arg, int num_threads){
    pthread_t p_thread[thread_count];
    int counts = end-start;
    int threads = num_threads;
    if(num_threads >= counts) threads = counts;
    int average_loop = counts/num_threads;

    for(int i = 0;i < threads;++i){
        struct for_index* index = (struct for_index*)malloc(sizeof(struct for_index));
        index->start = average_loop*i;
        index->end = increment;
        if(i < threads-1){
            index->end = average_loop*(i+1)-1;
        }
        else{
            index->end = counts - 1;
        }
        pthread_create(&(p_thread[i]),NULL,functor,(void*)index);
    }

    for(int i = 0; i < threads;i++){
        pthread_join(p_thread[i],NULL);
    }
}

parallel.h

#ifndef _PARALLEL_FOR_H_
#define _PARALLEL_FOR_H_
#include<stdio.h>
void parallel_for(int start, int end, int increment, void*(*functor)(void*), void *arg, int num_threads);
void* functor(void * argv);
#endif

program.c

#include<stdlib.h>
#include<stdio.h>
#include<time.h>
#include<pthread.h>
#include<parallel.h>

int *A,*B,*C;
int M,time1,time2;
int thread_count;

// void print_matrix(int* A){
//     for(int i = 0;i < M;++i){
//         for(int j = 0;j < M;++j){
//             printf("%d ",A[i*M+j]);
//         }
//         printf("\n");
//     }
// }

struct for_index
{
    int start;
    int end;
    int increment;
};


void* functor(void * argv){
    struct for_index* index = (struct for_index*) argv;
    int start = index->start;
    int end = index->end;
    int increment = index->increment;

    for(int i = start;i <= end;i+=increment){
        for(int j = 0;j < M;++j){
            int temp = 0;
            for(int k = 0;k < M;++k){
                temp += A[i*M+k]*B[k*M+j];
            }
            C[i*M+j] = temp;
        }
    }
}

void parallel_for(int start, int end, int increment, void*(*functor)(void*), void *arg, int num_threads){
    pthread_t p_thread[thread_count];
    int counts = end-start;
    int threads = num_threads;
    if(num_threads >= counts) threads = counts;
    int average_loop = counts/num_threads;

    for(int i = 0;i < threads;++i){
        struct for_index* index = (struct for_index*)malloc(sizeof(struct for_index));
        index->start = average_loop*i;
        index->end = increment;
        if(i < threads-1){
            index->end = average_loop*(i+1)-1;
        }
        else{
            index->end = counts - 1;
        }
        pthread_create(&(p_thread[i]),NULL,functor,(void*)index);
    }

    for(int i = 0; i < threads;i++){
        pthread_join(p_thread[i],NULL);
    }
}

int main(int argc, char ** argv){
    M = atoi(argv[1]);
    thread_count = atoi(argv[2]);

    A = (int*)malloc(sizeof(int)*M*M);
    B = (int*)malloc(sizeof(int)*M*M);
    C = (int*)malloc(sizeof(int)*M*M);
    for(int i = 0;i < M*M;++i){
        A[i] = rand()%5;
        B[i] = rand()%5;
        C[i] = 0;
    }

    time1 = clock();
    parallel_for(0,M,1,functor,NULL,thread_count);
    time2 = clock();
    // printf("A is\n");
    // print_matrix(A);
    // printf("B is\n");
    // print_matrix(B);
    // printf("C is\n");
    // print_matrix(C);
    free(A);
    free(B);
    free(C);
    printf("Time is:%.6f",(time2-time1)/CLK_TCK);
}