并行计算--线程池

最新推荐文章于 2021-07-29 21:15:43 发布

jaywangpku

最新推荐文章于 2021-07-29 21:15:43 发布

阅读量712

点赞数

分类专栏： Linux 文章标签： c++

本文链接：https://blog.csdn.net/u013095333/article/details/109131659

版权

Linux 专栏收录该内容

24 篇文章 0 订阅

订阅专栏

并行计算

并行计算根据不同的粒度，分为两类。各分类不一定绝对准确，实现上可能互有依赖。

分布式，集群，跨进程

1、基于MPI接口，需要自己实现分布式业务模式，学术界使用较多。
2、借助于现有分布式基础设施。
分布式计算框架类：hadoop，spark，powergraph等
分布式存储框架类：对象存储ceph（同时也有文件系统和块存储接口），分布式文件系统HDFS，GFS等，块存储
3、基于消息传递的分布式框架。
微服务，SOA，各个服务独立，消息传递，protobuf
远程过程调用，RPC

单机，可跨进程，线程，协程

在单机上，主要是利用线程池进行并行计算。在线程池的基础上，现在发展出更为轻量级的并行模式，协程。一般不使用跨进程的模式。
目前工业界系统设计时，在整个集群上，一般采用消息传递接口实现跨进程的分布式设计模式来设计系统。消息要么直接传递void*类型的结构体数据，要么经过序列化后进行传递。在单个节点上，一般采用线程池或协程进行并行化。
目前作为基础研发人员，以掌握线程池和协程为主。

线程池

c与c++标准库目前没有线程池的库，c++的boost库有线程池的实现。

多线程

从C++11开始，标准库里已经包含了对线程的支持，std::thread是C++11标准库中的多线程的支持库，pthread.h是标准库没有添加多线程之前的在Linux上用的多线程库，而之前在windows上的多线程支持要包含wndows.h。
C++线程库，一般是下面四个头文件配合使用：

#include <thread>
#include <functional>
#include <mutex>
#include <condition_variable>

多线程pthread.h（linux C语言）

参考：https://www.cnblogs.com/mq0036/p/3710475.html

1、线程标识符 pthread_t，用于标识一个线程。 在头文件/usr/include/bits/pthreadtypes.h中定义。

typedef unsigned long int pthread_t;

该定义就是一个unsigned long int型的数，和文件描述符类似。

2、创建线程函数，pthread_create。
函数原型为：

extern int pthread_create __P ((pthread_t *__thread, __const pthread_attr_t *__attr, void *(*__start_routine) (void *), void *__arg));

第一个参数：指向线程标识符的指针。
第二个参数：设置线程的属性。为NULL，则表示采用默认属性。
第三个参数：线程运行函数的起始地址，即函数名。
第四个参数：线程运输函数的参数。
返回码：当创建线程成功时，函数返回0，若不为0则说明创建线程失败，常见的错误返回代码为EAGAIN和EINVAL。前者表示系统限制创建新的线程，例如线程数目过多了，后者表示第二个参数代表的线程属性值非法。创建线程成功后，新创建的线程则运行参数三和参数四确定的函数，原来的线程则继续运行下一行代码。

3、等待一个线程结束，pthread_join。
函数原型为：

extern int pthread_join __P ((pthread_t __th, void **__thread_return));

第一个参数：被等待线程的线程标识符。
第二个参数：用户定义的指针，可以用来存储被等待线程的返回值。
这个函数是一个线程阻塞的函数，调用它的函数将一直等待到被等待的线程结束为止，当函数返回时，被等待线程的资源被收回。
一个线程的结束有两种途径，一种是像上面的例子一样，函数结束了，调用它的线程也就结束了，另一种方式是通过函数pthread_exit来实现。

4、线程退出函数，pthread_exit。
函数原型为：

extern void pthread_exit __P ((void *__retval)) __attribute__ ((__noreturn__));

唯一的参数是函数的返回码，只要pthread_join中的第二个参数thread_return不是NULL，这个值将被传递给 thread_return。

最后要说明的是，一个线程不能被多个线程等待，否则第一个接收到信号的线程成功返回，其余调用pthread_join的线程则返回错误代码ESRCH。

5、互斥锁，pthread_mutex_init，pthread_mutexattr_init，pthread_mutex_lock，pthread_mutex_unlock。
pthread_mutex_init：初始化一个互斥锁，使用默认属性。
pthread_mutexattr_init：初始化一个互斥锁，可设置属性。函数pthread_mutexattr_setpshared和函数 pthread_mutexattr_settype用来设置互斥锁属性。
pthread_mutex_lock：上锁。
pthread_mutex_unlock：解锁。
pthread_mutex_lock声明开始用互斥锁上锁，此后的代码直至调用pthread_mutex_unlock为止，均被上锁，即同一时间只能被一个线程调用执行。
当一个线程执行到pthread_mutex_lock处时，如果该锁此时被另一个线程使用，那此线程被阻塞，即程序将等待到另一个线程释放此互斥锁。

6、实验小代码

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

#define MAX 100

pthread_t threads[2];
pthread_mutex_t mux;
int num = 0;

void *thread1()
{
    printf("I am thread 1\n");
    while(num < MAX){
        printf("thread1, num = %d\n", num);
        pthread_mutex_lock(&mux);
        num++;
        pthread_mutex_unlock(&mux);
    }
    pthread_exit(NULL);
}

void *thread2()
{
    printf("I am thread 2\n");
    while(num < MAX){
        printf("thread2, num = %d\n", num);
        pthread_mutex_lock(&mux);
        num++;
        pthread_mutex_unlock(&mux);
    }
    pthread_exit(NULL);
}

void thread_create()
{
    memset(&threads, 0, sizeof(pthread_t));
    if(pthread_create(&threads[0], NULL, thread1, NULL) != 0){
        printf("create thread1 failed\n");
    }
    else{
        printf("create thread1 success\n");
    }
    if(pthread_create(&threads[1], NULL, thread2, NULL) != 0){
        printf("create thread2 failed\n");
    }
    else{
        printf("create thread2 success\n");
    }
}

void thread_wait()
{
    if(threads[0] != 0){
        pthread_join(threads[0], NULL);
        printf("thread1 end\n");
    }
    if(threads[1] != 0){
        pthread_join(threads[1], NULL);
        printf("thread2 end\n");
    }
}

int main()
{
    pthread_mutex_init(&mux, NULL);
    thread_create();
    thread_wait();
    return 0;
}

多线程pthread实现线程池

介绍一下生产者消费者中的cond变量相关函数：

/* 初始化一个条件变量 */
int pthread_cond_init(pthread_cond_t* cond, pthread_condattr_t *cond_attr);
 
/* 销毁一个条件变量 */
int pthread_cond_destroy(pthread_cond_t* cond);
 
/* 令一个消费者等待在条件变量上 */
int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex);
/* 计时等待，超时返回ETIMEOUT */
int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const   struct timespec *abstime); 

/* 生产者通知等待在条件变量上的消费者 */
int pthread_cond_signal(pthread_cond_t* cond);
 
/* 生产者向消费者广播消息 */
int pthread_cond_broadcast(pthread_cond_t* cond);

利用pthread库实现线程池
condition.h

#pragma once

#include <sys/time.h>
#include <pthread.h>

typedef struct condition{
    pthread_mutex_t p_mutex;
    pthread_cond_t p_cond;       // mutex只能被动检查，cond可以主动通知
}cond_t;

int cond_init(cond_t *cond);
int cond_destory(cond_t *cond);
int cond_lock(cond_t *cond);
int cond_unlock(cond_t *cond);
int cond_wait(cond_t *cond);
int cond_timedwait(cond_t *cond, const struct timespec *tv);
int cond_signal(cond_t *cond);
int cond_broadcast(cond_t *cond);

condition.c

#include "condition.h"
#include "pool_util.h"
int cond_init(cond_t* cond)
{
    int ret;
    ret = pthread_mutex_init(&cond->p_mutex, NULL);
    if(ret) {
        printf("pthread_mutex_init, ret = %d\n", ret);
    }

    ret = pthread_cond_init(&cond->p_cond, NULL);
    if(ret) {
        printf("pthread_cond_init, ret = %d\n", ret);
    }
    return 0;
}

int cond_destroy(cond_t* cond)
{
    int ret;
    ret = pthread_mutex_destroy(&cond->p_mutex);
    if(ret) {
        printf("pthread_mutex_destroy, ret = %d\n", ret);
    }

    ret = pthread_cond_destroy(&cond->p_cond);
    if(ret) {
        printf("pthread_cond_destroy, ret = %d\n", ret);
    }
    return 0;
}

int cond_lock(cond_t* cond)
{
    return pthread_mutex_lock(&cond->p_mutex);
}

int cond_unlock(cond_t* cond)
{
    return pthread_mutex_unlock(&cond->p_mutex);
}

/*
1、调用pthread_cond_wait前需要先对互斥量mutex上锁，才能把&mutex传入pthread_cond_wait函数
2、在pthread_cond_wait函数内部，会首先对传入的mutex解锁
3、当等待的条件到来后，pthread_cond_wait函数内部在返回前会去锁住传入的mutex
注意：在pthread_cond_wait内部会有释放锁和加锁的过程，看到这儿豁然开朗
*/
int cond_wait(cond_t* cond)
{
    return pthread_cond_wait(&cond->p_cond, &cond->p_mutex);
}

int cond_timedwait(cond_t* cond, const struct timespec *ts)
{
    return pthread_cond_timedwait(&cond->p_cond, &cond->p_mutex, ts);
}

int cond_signal(cond_t* cond)
{
    return pthread_cond_signal(&cond->p_cond);   // 发送一个信号给另一个处于阻塞状态的线程，即激活一个线程。如果没有阻塞状态的线程，也会返回成功。
}

int cond_broadcast(cond_t* cond)
{
    return pthread_cond_broadcast(&cond->p_cond);   // 激活全部处于阻塞状态的线程。
}

pthreadpool.h

#pragma once

#include "condition.h"


typedef void* (*TASK_ROUTINE) (void*);
typedef void* TASK_PARA_TYPE;

typedef struct task {
    TASK_ROUTINE run;
    TASK_PARA_TYPE arg;
    struct task *next;
}task_t;

typedef struct threadpool {
    cond_t ready;
    task_t *first;
    task_t *last;
    int threadcnt;                               // thread count at the present
    int idle;                                    // idle(闲置) thread count at the present
    int max_threads;
    int quit;                                    // set 1 when destroying thread pool
}threadpool_t;

void threadpool_init(threadpool_t* pool, int max_threads);
void threadpool_destroy(threadpool_t *pool);
void threadpool_add_task(threadpool_t *poo, TASK_ROUTINE mytask, TASK_PARA_TYPE arg);

pthreadpool.c

#include <unistd.h>
#include <errno.h>

#include "pthreadpool.h"
#include "pool_util.h"

void* thread_routine(void* arg) {
    pthread_t tid = pthread_self();
    printf("Thread %#lx starting\n", (size_t)tid);
    threadpool_t *pool = (threadpool_t*)arg;  // 传给线程的参数就是threadpool本身
    int timedout;

    while(1) {
        timedout = 0;
        cond_lock(&pool->ready);

        pool->idle++;
        //waiting for new task or the destroy of thread pool
        while((NULL==pool->first) &&  (0==pool->quit)) {  // 没有task要处理，且并不要销毁线程池，那么就wait进入休眠，等着处理task
        //while((NULL==pool->first)) {
            printf("Thread %#lx waiting\n", (size_t)tid);
            //blocked wait
            //cond_wait(&pool->ready);

            //impletement timedout wait
            struct timeval tv;
            struct timespec ts;
            gettimeofday(&tv, NULL);
            ts.tv_sec = tv.tv_sec + 2;
            ts.tv_nsec = 0;
            int ret = cond_timedwait(&pool->ready, &ts);  // 等待2秒钟，如果这期间有任务来了，那么就执行task，重新跑thread_routine

            if(ETIMEDOUT == ret) {
                printf("Thread %#lx waiting timedout\n", (size_t)tid);
                timedout = 1;
                break;
            }
        }

        pool->idle--;  // 一个空线程，最多等待2s，如果依然没有task需要处理，就跳出来，闲置线程减1，没有task则该线程退出

        // new task。有task，就取出task干活。取完task之后就可以把锁unlock了
        if(pool->first) {  // 没有任务线程超时后不执行此处，退出   有任务则直接在这个地方执行task
            // extract a task from the head of the queue
            task_t *tk = pool->first;
            pool->first = tk->next;     

            //It takes some time to excute task, unlock first to permit 
            //other producers to add task, and other consumers to enter the loop 
            cond_unlock(&pool->ready);
            //execute task
            tk->run(tk->arg);  // 执行任务，一般较耗时
            free(tk);
            cond_lock(&pool->ready);  // 执行完成上一个任务之后，继续争取mutex，因为在while(1)中，只要有task就要不断获取mutex，从task中互斥地取出任务执行
        }

        // the destroy of thread pool 如果已经没有任务要处理，并且确定要销毁线程池。那么每个执行到这儿的线程就准备退出了
        // 但凡还有task要执行，都会优先执行task，（因为上一步）
        if(pool->quit && NULL==pool->first) {
            pool->threadcnt--;
            if(0 == pool->threadcnt)         // 如果线程池中没有线程了，则当前线程为最后一个线程池中的线程
                cond_signal(&pool->ready);   // 主线程有可能在threadpool_destroy中wait，通知主线程往下执行
            cond_unlock(&pool->ready);       //do not forget unlock when breaking out the loop
            break;
        }


        // wait timedout
        if(timedout && NULL==pool->first) {  // wait超过时间，退出
            pool->threadcnt--;
            cond_unlock(&pool->ready);//do not forget unlock when breaking out the loop
            break;
        }

        cond_unlock(&pool->ready);
    }
    printf("Thread %#lx exiting\n", (size_t)tid);
    return NULL;
}

//initialize thread pool
void threadpool_init(threadpool_t* pool, int max_threads)
{
    cond_init(&pool->ready);
    pool->first = pool->last = NULL;
    pool->threadcnt = pool->idle = 0;
    pool->max_threads = max_threads;
    pool->quit = 0;
}

//deallocate thread pool
void threadpool_destroy(threadpool_t *pool)
{
    if(pool->quit) {
        return; 
    }

    cond_lock(&pool->ready);   // 主线程获取锁，为了访问quit threadcnt等临界变量

    pool->quit = 1;
    if(pool->threadcnt) {
        //the working thread cannot receive the broadcast notification
        // 如果此时有闲置的线程，就说明没有task可以处理，激活后让闲置线程走销毁流程
        if(pool->idle)  // 闲置的thread，全部处于wait状态。线程执行到wait处就会休眠，等待被通知激活。拿不到mutex时会一直等在那儿，不断重试以获取锁
            cond_broadcast(&pool->ready);  // 唤醒所有处于wait状态的线程，即开始执行注册的线程函数

        while(pool->threadcnt) {   // 一直在这儿等着全部的线程都退出
            //printf("Waiting thread(s) to exit\n");
            cond_wait(&pool->ready);
        }
    }

    cond_unlock(&pool->ready);
    cond_destroy(&pool->ready);
}

// add a task to thread pool
void threadpool_add_task(threadpool_t *pool, TASK_ROUTINE mytask, TASK_PARA_TYPE arg)
{
    task_t* newtask = (task_t*)malloc(sizeof(task_t));
    newtask->run = mytask;
    newtask->arg = arg;
    newtask->next = NULL;

    cond_lock(&pool->ready);

    // insert newtask at the end of the queue
    if(pool->first) {
        pool->last->next = newtask;
    } else {
        pool->first = newtask;
    }
    pool->last = newtask;

    // notify waiting threads
    if(pool->idle > 0) {
        cond_signal(&pool->ready);
    } else if(pool->threadcnt < pool->max_threads) { //add new thread if not reaching limit
        pthread_t tid;
        int ret;
        if((ret=pthread_create(&tid, NULL, thread_routine, (void*)pool))) {  // 新增一个线程之后，新建的线程会自动开始执行thread_routine
            printf("pthread_create, ret = %d\n", ret);
        }
        pool->threadcnt++;
    }

    cond_unlock(&pool->ready);
}

main.c

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "pthreadpool.h"

#define DEFAULT_TASK_CNT    10
#define DEFAULT_THREAD_CNT  3

// task执行函数
void* task_routine(void* arg) {
    pthread_t tid = pthread_self();
    int task_id = *(int*)arg;
    free(arg);
    printf("Thread %#lx working on task %d\n", (size_t)tid, task_id);
    sleep(1);
    return NULL;
}

int main(int argc, char** argv)
{
    int taskcnt, threadcnt, ch;
    taskcnt = DEFAULT_TASK_CNT;
    threadcnt = DEFAULT_THREAD_CNT;
    char* prog = argv[0];
    while((ch = getopt(argc, argv, "t:k:")) != -1) {
        switch (ch) {
        case 't':
            taskcnt = atoi(optarg);
            break;
        case 'k':
            threadcnt = atoi(optarg);
            break;
        case '?':
        default:
            printf("Usage: %s [-k threadcnt] [-t taskcnt]\n""\tdefault threadcnt=3, taskcnt=10\n", prog);
            exit(EXIT_FAILURE);
        }
    }

    threadpool_t pool;
    threadpool_init(&pool, threadcnt);

    int i;
    for(i = 0; i < taskcnt; i++) {
        void *arg = malloc(sizeof(int));
        memcpy(arg, &i, sizeof(int));
        threadpool_add_task(&pool, task_routine, arg);
    }

    printf("开始destory\n");
    threadpool_destroy(&pool);
    printf("主进程结束！\n");

    return 0;
}

Makefile

.PHONY: all clean

CC=gcc
CFLAGS=-Wall -g
LIB=-lpthread
OBJS=main.o pthreadpool.o condition.o
BIN=proc

all:$(BIN)
$(BIN):$(OBJS)
	$(CC) $(CFLAGS) -o $@ $^ $(LIB)

# to acquire the rules
#use: gcc -MM *.c
condition.o: condition.c condition.h pool_util.h
main.o: main.c pthreadpool.h condition.h
pthreadpool.o: pthreadpool.c pthreadpool.h condition.h pool_util.h

clean:
	@rm -rf *.dSYM *.o $(BIN)