Linux线程池实现并行排序

最新推荐文章于 2022-03-10 09:47:21 发布

caoyan_12727

最新推荐文章于 2022-03-10 09:47:21 发布

阅读量1.2k

点赞数 1

分类专栏： unix 文章标签： unix线程池实现并行排序线程池 pthread_cond_signal应

本文链接：https://blog.csdn.net/caoyan_12727/article/details/52313424

版权

unix 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

　　在多线程编程中，如果一个应用需要频繁的创建和销毁线程，而任务执行的时间又非常短，那么cpu的时间都花在线程的创建和销毁上。此时就应该使用线程池。如果线程的创建和线程的销毁相比任务的执行时间可以忽略不计，就没有必要使用线程池了.下面我们就来看看一个简单的实现：

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <pthread.h>
#include <assert.h>
#include <iostream>
#define num 100
#define thread_num 3
using namespace std;

int data[num];//待排序数据
int thread_unit=10;//每个任务处理10个数据

typedef struct worker{
	void *(*process) (void *arg);/*该任务需要调用的函数*/
	void *arg;/*该任务需要的参数*/
	struct worker *next;
} task;

/*线程池结构*/
typedef struct{
	pthread_mutex_t queue_lock;//线程池的互斥变量
	pthread_cond_t queue_ready;//线程池的条件变量
	task *queue_head; /*链表结构，线程池中所有等待任务*/
	int shutdown;/*是否销毁线程池*/
	pthread_t *threadid;//线程ID的指针
	int max_thread_num;/*线程池中允许的活动线程数目*/
	int cur_queue_size;/*当前等待队列的任务数目*/
} thread_pool;

int add_task(void *(*process) (void *arg), void *arg);
void *thread_routine(void *arg);

static thread_pool *pool = NULL;//刚开始指针为空
void pool_init(int max_thread_num){
	pool = (thread_pool *)malloc(sizeof (thread_pool));
	pthread_mutex_init(&(pool->queue_lock), NULL);
	pthread_cond_init(&(pool->queue_ready), NULL);
	pool->queue_head = NULL;
	pool->max_thread_num = max_thread_num;
	pool->cur_queue_size = 0;
	pool->shutdown = 0;
	pool->threadid = (pthread_t *)malloc(max_thread_num * sizeof (pthread_t));
	int i = 0;
	for (i = 0; i < max_thread_num; i++){
		pthread_create(&(pool->threadid[i]), NULL, thread_routine, NULL);//创建线程
	}
}

int add_task(void *(*process) (void *arg), void *arg){/*向线程池中加入任务*/
	task *newworker = (task *)malloc(sizeof (task));/*构造一个新任务*/
	newworker->process = process;
	newworker->arg = arg;
	newworker->next = NULL;
	pthread_mutex_lock(&(pool->queue_lock));//向任务队列中添加任务是互斥操作要上锁
	task *member = pool->queue_head;
	if (member != NULL){
		while (member->next != NULL)
			member = member->next;
		member->next = newworker;
	}
	else{
		pool->queue_head = newworker;//尾插法
	}
	assert(pool->queue_head != NULL);
	pool->cur_queue_size++;
	pthread_mutex_unlock(&(pool->queue_lock));
	pthread_cond_signal(&(pool->queue_ready));/*条件已满足，向等待这个条件的线程发出信号，唤醒休眠的进程*/
	return 0;
}

int pool_destroy(){
	if (pool->shutdown)return -1;/*防止两次调用*/
	pool->shutdown = 1;
	/*唤醒所有等待线程，线程池要销毁了*/
	pthread_cond_broadcast(&(pool->queue_ready));
	int i;
	for (i = 0; i < pool->max_thread_num; i++)pthread_join(pool->threadid[i], NULL);//所有线程结束
	free(pool->threadid);
	/*销毁等待队列*/
	task *head = NULL;
	while (pool->queue_head != NULL){
		head = pool->queue_head;
		pool->queue_head = pool->queue_head->next;
		free(head);
	}
	/*条件变量和互斥量也别忘了销毁*/
	pthread_mutex_destroy(&(pool->queue_lock));
	pthread_cond_destroy(&(pool->queue_ready));
	free(pool);
	/*销毁后指针置空是个好习惯*/
	pool = NULL;
	return 0;
}

void *thread_routine(void *arg){
	printf("starting thread 0x%x\n", pthread_self());
	while (1){
		pthread_mutex_lock(&(pool->queue_lock));
		while (pool->cur_queue_size == 0 && !pool->shutdown){/*如果等待队列为0并且不销毁线程池，则处于阻塞状态; 注意pthread_cond_wait是一个原子操作，等待前会解锁，唤醒后会加锁*/
			printf("thread 0x%x is waiting\n", pthread_self());
			pthread_cond_wait(&(pool->queue_ready), &(pool->queue_lock));//线程进行等待状态，同时解锁
		}
		/*线程池要销毁了*/
		if (pool->shutdown){
			/*遇到break,continue,return等跳转语句，千万不要忘记先解锁*/
			pthread_mutex_unlock(&(pool->queue_lock));
			printf("thread 0x%x will exit\n", pthread_self());
			pthread_exit(NULL);
		}
		printf("thread 0x%x is starting to work\n", pthread_self());
		assert(pool->cur_queue_size != 0);//任务队列不为空
		assert(pool->queue_head != NULL);//
		/*等待队列长度减去1，并取出链表中的头元素*/
		pool->cur_queue_size--;
		task *worker = pool->queue_head;
		pool->queue_head = worker->next;
		pthread_mutex_unlock(&(pool->queue_lock));
		(*(worker->process)) (worker->arg);/*调用排序函数函数开始执行任务*/
		free(worker);
		worker = NULL;
		sleep(1);
	}
	cout<<"excute here!"<<endl;
	pthread_exit(NULL);
}
void heapsort(int begin){//将每一段进行排序
	int start=begin*thread_unit;
	int end = begin*thread_unit + thread_unit - 1;//最后一个元素
	int min,temp;
	for (int i = start; i <=end; i++){
		min = i;
		for (int j = i; j <=end; j++){
			if (data[min]>data[j])min = j;
		}
		if (i != min){//交换
			data[i] =data[i]+data[min];
			data[min] = data[i] - data[min];
			data[i] = data[i] - data[min];
		}
	}
}

void *myprocess(void *arg){
	printf("threadid is 0x%x, working on data %d\n", pthread_self(), *((int *)arg));
	int start = *((int *)arg);//排序开始的地方
	heapsort(start);
	return NULL;
}

void *merge(void *){//k路归并排序
	int *start=(int *)malloc(10*sizeof(int));
	int *top=(int *)malloc(10*sizeof(int));
	int *temp=(int *)malloc(num*sizeof(int));
	for(int i=0;i<10;i++){
		start[i]=i*10;
		top[i]=10*(i+1);
	}
	int count=0;//temp数组的起始位置
	//开始归并
	int mark,minimum,flag;
	while(count!=num){
		flag=0;
		for(int i=0;i<10;i++){
			if(start[i]<top[i]){//没有超出界限
				if(flag==0){//只执行1次
					minimum=start[i];
					mark=i;//i标示属于那一段
				}
				flag=1;
				if(data[minimum]>data[start[i]]){
					minimum=start[i];
					mark=i;
				}
			}				
		}
		start[mark]++;
		temp[count++]=data[minimum];				
	}
	for(int i=0;i<num;i++)data[i]=temp[i];
	free(start);
	free(top);
	free(temp);
	return NULL;
}

int main(int argc, char **argv){
	for (int j = 0; j < num; j++){
		data[j] = rand() % 100;
		if(j!=0&&j%thread_unit==0)cout<<endl;
		cout<<data[j]<<" ";	
	}
	cout<<endl;
	pool_init(thread_num);/*线程池中最多3个活动线程*/
	sleep(1);
	/*连续向任务队列中投入10个任务*/
	int *workingnum = (int *)malloc(sizeof (int)* 10);
	int i;
	for (i = 0; i < 10; i++){
		workingnum[i] = i;
		add_task(myprocess, &workingnum[i]);
	}
	sleep(1);//等待前面10个任务完成
	cout<<"after sorting:"<<endl;
	for (int j = 0; j < num; j++){
		if(j!=0&&j%thread_unit==0)cout<<endl;
		cout<<data[j]<<" ";	
	}
	add_task(merge,NULL);//最后的数据排序
	/*等待所有任务完成*/
	sleep(20);
	/*销毁线程池*/
	pool_destroy();
	cout<<"after merging:"<<endl;
	for (int j = 0; j < num; j++){
		if(j!=0&&j%thread_unit==0)cout<<endl;
		cout<<data[j]<<" ";	
	}
	cout<<endl;
	free(workingnum);
	return 0;
}

测试结果：

初始的100个数：

新创建的3个进程对100个数分10组进行排序，每一组的数据是有序的，结果如下：

最后利用k路归并排序，对10组有序数据进行排序，结果如下：

可见，这100个数最后都从小到大排列了！！！！

在程序中要注意：

pthread_cond_broadcast(&(pool->queue_ready));

如果我们将这一行注释掉的话，程序就会陷入死循环，因为linux线程执行和windows不同，pthread有两种状态joinable状态和unjoinable状态，如果线程是joinable状态，当线程函数自己返回退出时或pthread_exit时都不会释放线程所占用堆栈和线程描述符（总计8K多）。只有当你调用了pthread_join之后这些资源才会被释放。若是unjoinable状态的线程，这些资源在 线程函数退出时或 pthread_exit时自动会被释放。若没有指定线程的属性，则默认创建的线程是joinable类型，即创建这些线程的线程(主线程)要调用phtread_join来等待子线程结束并释放掉子线程的资源。如果将上面一行注释掉，则所有线程都处于休眠状态，pthread_join也会使主线程一直阻塞下去！！！！！

最后总结一下上述程序创建线程池的逻辑：

第一步：创建N个线程，这些线程首先检测任务队列是否为空，如果为空则等待条件变量(即队列中有任务的时候);

第二步：将计算任务添加到任务队列中，在任务队列task中的process字段可以指定任务的处理函数，最后调用pthread_cond_singal发出信号;

第三步：调用pthread_jion等待所有任务完成并回收资源，最后销毁线程池；