合并排序

背景概述

        上一篇学习了使用递归实现快排算法。快排适合数据规模较小(5M以下)的排序,当排序数据规模较大(1G以上)时,效率就比较差了。合并排序比较适合大规模排序,基数排序也不错。合并排序的原理,自己search一下吧。

技术要点

1. 合并排序;

2. 快排;

3. 并行计算;

 

程序流程


1. gen_rand_arrary()产生argv[1]个随机数;
2. split_sort()将argv[1]个随机数分成大小为argv[2]的堆;

   然后quick_sort_handle()并行的对每个堆进行快排;
3. merge_main()将快排好的堆进行合并;

 

/********************************************************
说明:
    该文件实现合并排序;堆内数据进行快排;

    涉及的内容:
        1. 多核并行计算;
        2. 快排序;
        3. 合并排序

测试环境:
    1. Linux 2.6.18-348.el5 #1 SMP 24core.

测试结果:
[root@dell38 app]# time ./mergesort1 -n 1200000000 -s 50000000 
Gen rand number :1200000000 ...
       Gen over:
        604289383 846930886 481692777 514636915 757747793 424238335 719885386 449760492 596516649 1189641421 1025202362 150490027 783368690 1102520059 844897763 767513926 165180540 340383426 304089172 103455736 35005211 521595368 294702567 526956429 336465782 861021530 278722862 233665123 945174067 468703135 1101513929 601979802 115634022 635723058 169133069 1125898167 1059961393 889018456 628175011 456478042 1131176229 453377373 859484421 714544919 608413784 756898537 534575198 773594324 149798315 838664370 1129566413 184803526 412776091 224268980 711759956 749241873 137806862 42999170 982906996 135497281 511702305 884420925 737477084 627336327 572660336 1159126505 805750846 432621729 1100661313 233925857 1141616124 84353895 939819582 801100545 798898814 348233367 610515434 385990364 174344043 760313750 277171087 356426808 945117276 689947178 580695788 709393584 491705403 718502651 752392754 274612399 853999932 64095060 211549676 643993368 943947739 784210012 855636226 549698586 269348094 756297539 1036140795 463480570 840651434 775960378 317097467 692066601 176710097 927612902 130573317 603570492 487926652 660260756 959997301 485560280 402724286 593209441 1194953865 894429689 364228444 747346619 221558440 270744729 1063958031 433108117 914738097 807905771 269834481 822890675 
Quick sort 1200000000 number...
split task 3 over.
split task 5 over.
split task 1 over.
split task 7 over.
split task 18 over.
split task 9 over.
split task 22 over.
split task 12 over.
split task 14 over.
split task 16 over.
merg task 1 over.
split task 20 over.
merg task 2 over.
merg task 4 over.
merg task 0 over.
merg task 3 over.
split task 10 over.
split task 8 over.
merg task 5 over.
split task 0 over.
split task 4 over.
merg task 6 over.
merg task 8 over.
split task 2 over.
split task 6 over.
split task 19 over.
split task 13 over.
split task 17 over.
merg task 7 over.
split task 15 over.
split task 21 over.
merg task 12 over.
merg task 11 over.
merg task 10 over.
merg task 9 over.
split task 23 over.
merg task 13 over.
split task 11 over.
merg task 15 over.
merg task 14 over.
merg task 18 over.
merg task 17 over.
merg task 16 over.
merg task 19 over.
merg task 20 over.
merg task 21 over.
merg task 22 over.

###OVER###:
        1   2   3   3   4   5   7   8   9   9   9   9   10  11  11  11  12  14  15  16  17  17  17  19  19  21  23  24  24  26  27  27  27  28  31  31  31  32  33  33  35  37  38  38  38  39  40  40  41  42  42  44  44  44  44  45  46  46  47  48  50  50  50  51  52  52  52  53  54  55  55  56  58  58  58  59  59  60  60  63  64  64  66  67  67  67  67  68  71  71  72  74  75  75  76  77  77  79  79  83  83  84  84  84  85  89  89  92  93  94  95  95  96  96  97  97  99  99  100 101 103 
Start check merge sort result:
OK.

real    1m12.338s
user    6m31.432s
sys     0m10.275s


结果分析:
    使用合并排序+多线程,对12亿数据的排序要比单独使用串行快排快4倍左右。

2014.11.12 Aming created. 


问题:
    1. 并行合并有些共享资源没有加锁保护,可能出问题。
*********************************************************/


#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <unistd.h>
#define __USE_GNU
#include <sched.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#include <sys/time.h>
#include <time.h>

#include <pthread.h>

typedef struct node_s
{
    int data;

    struct node_s *pre;
    struct node_s *next;
}node_t;

typedef node_t arrary_t;
#define arrary_value(a) ((a)->data)



typedef enum status_e
{
    STATUS_INVALID = 0,
    STATUS_RUNNING,
    STATUS_ERROR,
    STATUS_OVER
}status_t;

typedef struct task_s
{
    pthread_t task_id;
    int cpu_id;

    status_t status; 

    node_t *head;
    node_t *tail;
    
}task_t;

typedef struct merge_pair_s
{
    pthread_t task_id;
    int cpu_id;
        
    task_t *a;
    task_t *b;
}merge_pair_t;


int loop = 0;

arrary_t *arrary;

#define ARRARY_MALLOC_TRUE 

int ARRARY_LEN = 5000000;
int SPLIT_NUM = 5000000;

merge_pair_t pair;

int MAX_TASK_NUN = 0;
task_t *split_task_des = NULL;

int MAX_TASK_OVER_NUM = 0;

int merge_task_counter = 0;



#if 1
#define list_add_tail(t, new) \
{\
    (t)->next = (new); \
    (new)->pre=(t);\
    (t)=(new);\
    (new)=(t)->next;\
    t->next = NULL;\
}

#define list_show(prefix, head, tail, snum) \
{ \
    int counter_list_show = 0;\
    (tail)->next = NULL;\
    arrary_t * tmp = (head);\
    printf("%s:\n\t", (prefix));\
    while (tmp != NULL)\
    {\
        printf("%-3d ", tmp->data);\
        tmp = tmp->next;\
        counter_list_show++;\
        if ((counter_list_show)%SPLIT_NUM == 0)printf("\n\t");\
        if (counter_list_show > snum) break;\
    }\
}
#endif

/*
说明:
    实现对每个堆进行快排
*/
int quick_sort_for_merge(arrary_t *left, arrary_t *right)
{
    #if 0
    loop++;
    printf("loop:%d left:%d, right:%d->\n", loop++, *left, *right);    

    if (loop > 1)
        return 0;
    #endif
    
    int ret = 0;
    int center;
    arrary_t *low = left;
    arrary_t *up  = right;

    if ((NULL == left) || (NULL == right))
    {
        return -1;
    }
    
    if (left == right)
    {
        printf("~~~~~~~~~~~~end.\n");
        return 0;
    }

    center = arrary_value(low);

    while (low != up)
    {
        //printf("low:%3d, up:%3d ", *low, *up);    
        //show_arrary("arrary:", arrary, low, up);printf("\n");
        
        if (arrary_value(low) == center)
        {
            if (arrary_value(low) < arrary_value(up))
            {
                up--;
            }
            else if (arrary_value(low) > arrary_value(up))
            {
                arrary_value(low) = arrary_value(up);
                arrary_value(up)  = center;
                low++;
            }
            else //(arrary_value(low) == arrary_value(up))
            {
                up--;
            }
        }
        else //(arrary_value(up) == center)
        {
            if (arrary_value(low) < arrary_value(up))
            {
                low++;
            }
            else if (arrary_value(low) > arrary_value(up))
            {
                arrary_value(up)  = arrary_value(low);  
                arrary_value(low) = center;
                up--;
            }
            else //(arrary_value(low) == arrary_value(up))
            {
                low++;
            }        
        }
        
        //show_arrary("arrary:", arrary, low, up);printf("\n");

        continue;
    }//end while


    if ((low - left) > 1)
    {
        ret = quick_sort_for_merge(left, low-1);
        if (0 != ret)
        {
            printf("error.\n");
            return ret;
        }
    }

    if ((right - up) > 1)
    {
        ret = quick_sort_for_merge(up + 1, right);
        if (0 != ret)
        {
            printf("error.\n");
            return ret;
        }
    }

    return 0;
}

void pthread_set_affinity(int32_t cpu)
{
	cpu_set_t cpumask;
	
	CPU_ZERO(&cpumask);
	CPU_SET(cpu % sysconf(_SC_NPROCESSORS_ONLN), &cpumask);
	sched_setaffinity(0, sizeof(cpumask), &cpumask);	
}


int gen_rand_arrary_init(arrary_t *a, int num)
{
    int i;

    int seed = rand();

    for (i = 0; i < num; i++)
    {
        #if 1
        //arrary_value(a+i) = rand()%ARRARY_LEN;
        arrary_value(a+i) = (seed+i)%ARRARY_LEN;
        #elif 0
        arrary_value(a+i) = rand();
        #else
        arrary_value(a+i) = ARRARY_LEN - i;
        #endif
        
        a[i].pre  = NULL;
        a[i].next = NULL;
    }
    
    return 0;
}

void* quick_sort_handle(void *arg)
{
    int ret = 0;
    node_t *tmp, *new;

    task_t *t = (task_t *)arg;

    //printf("task:%d...\n", t->cpu_id);
    pthread_set_affinity(t->cpu_id);
    t->status = STATUS_RUNNING;

    #if 0 //并行产生随机数,现在有问题,多线程同时产生随机数时很慢。TBD debug...
    if (t->cpu_id == (MAX_TASK_NUN -1))
    {
        gen_rand_arrary_init(split_task_des[t->cpu_id].head,  (ARRARY_LEN-(t->cpu_id*SPLIT_NUM)));
    }
    else
    {
        gen_rand_arrary_init(split_task_des[t->cpu_id].head,  SPLIT_NUM);
    }

    printf("split task %d gen rand over.\n", t->cpu_id);
    
    #endif

    //对堆进行快排
    ret = quick_sort_for_merge(t->head, t->tail);
    if (0 != ret)
    {
        printf("[fun:%s] error.\n", __FUNCTION__);
    }

    #if 1 //将排好序的堆初始化成双向链表。
        t->head->pre = NULL;
        t->tail->next = NULL;
        tmp = t->head;
        new = (tmp+1);
        while (1)
        {
            tmp->next = new;
            new->pre = tmp;
            tmp = new;

            if (new == t->tail)
            {
                break;
            }
            else
            {
                new = (tmp+1);
            }
        }
    #endif    

    printf("split task %d over.\n", t->cpu_id);
    
    t->status = STATUS_OVER;
    
    return NULL;
}


/*
说明:
    将待排序数列分成多个堆,然后并行的在多个核上使用快排对堆内数据进行排序;
*/
#define split_sort_dbg 0
int split_sort(arrary_t *arrary, int total, int split_num)
{
    int ret = 0;
    int i;
    int status = 0;
    int task_num = total/split_num + (( total%split_num == 0) ? 0 : 1);

    (void)ret;
    (void)status;

    MAX_TASK_NUN = task_num;
    split_task_des = (task_t * )malloc(sizeof(task_t)*MAX_TASK_NUN);
    if (NULL == split_task_des )
    {
        printf("Malloc error.\n");
        return -1;
    }
    
    memset(split_task_des, 0, sizeof(task_t)*MAX_TASK_NUN);

    #if split_sort_dbg
        for (i = 0; i < ARRARY_LEN; i++)
        {
            printf("arrary %d address:%p\n", i, arrary+i);
        }
        printf("task num:%d\n", task_num);

    #endif

    for (i = 0; i < task_num; i++)
    {
        split_task_des[i].cpu_id = i;
        split_task_des[i].head   = arrary + (i*split_num);
        if (i == (task_num -1))
        {
            split_task_des[i].tail   = arrary + ARRARY_LEN - 1;        
        }
        else
        {
            split_task_des[i].tail   = arrary + ((i+1)*split_num) - 1;
        }

        //printf("task %d head(%p) tail(%p)\n", i, split_task_des[i].head, split_task_des[i].tail);
        
		pthread_create( &split_task_des[i].task_id, NULL, quick_sort_handle, (void *)&(split_task_des[i]) );
    }

//sleep(1);


#if 0
    while (1)
    {   
        for (i = 0; i < task_num; i++)
        {
            status = split_task_des[i].status ;

            if (status != STATUS_OVER)
            {
                break;
            }
        }

        if (status == STATUS_OVER)
        {
            break;
        }
        
        usleep(100);
    }

    for (i = 0; i < task_num; i++)
    {
		pthread_join( split_task_des[i].task_id, (void **)&ret);
    }

    #if split_sort_dbg
    for (i = 0; i < task_num; i++)
    {
        printf("split task %d information:\n", i);
        printf("\t head:%p\n", split_task_des[i].head);
        printf("\t tail:%p\n", split_task_des[i].tail);
    }    
    #endif

    for (i = 0; i < ARRARY_LEN; i++)
    {
        if (i == 0)
        {
            arrary[i].pre  = NULL;
            arrary[i].next = &(arrary[i+1]);
        }
        else if (i == (ARRARY_LEN-1))
        {
            arrary[i].pre  = &(arrary[i-1]);
            arrary[i].next = NULL;
        }
        else
        {
            arrary[i].pre  = &(arrary[i-1]);
            arrary[i].next = &(arrary[i+1]);
        }
    }

    for (i = 0; i < task_num; i++)
    {
        split_task_des[i].head->pre  = NULL;
        split_task_des[i].tail->next = NULL;

        #if 0
        list_show("$$$", split_task_des[i].head, split_task_des[i].tail, 12);
        #endif

    }     
#endif

    return 0;
}


#define merge_sort_dbg 0
int merge_sort(task_t *pair_a, task_t *pair_b)
{
    arrary_t *tHead = NULL;
    arrary_t *tTail = NULL;
    
    arrary_t *pa = NULL;
    arrary_t *pb = NULL;

    arrary_t *tmp = NULL;
    
    if ((NULL == pair_a) || (NULL == pair_b))
        return -1;

    pair_a->status = STATUS_RUNNING;
    pair_b->status = STATUS_RUNNING;
        

    pa = pair_a->head;
    pb = pair_b->head;

    #if merge_sort_dbg
    printf("pa:%d ", arrary_value(pa));
    printf("pb:%d ", arrary_value(pb));
    #endif
    
    if (arrary_value(pa) <= arrary_value(pb)) 
    {
        tHead = pair_a->head;
        pa = tHead->next;
    }
    else
    {
        tHead = pair_b->head;
        pb = tHead->next;

    }
    
    if (arrary_value(pair_a->tail) <= arrary_value(pair_b->tail)) 
    {
        tTail = pair_b->tail;
    }
    else
    {
        tTail = pair_a->tail;
    }

    tmp = tHead;

    #if merge_sort_dbg
    printf("start merge:%d \n", arrary_value(tmp));
    #endif
    
    while ((NULL != pa)&& (NULL != pb))
    {
        #if merge_sort_dbg
        printf("\npa:%d pb:%d \n", arrary_value(pa), arrary_value(pb));
        #endif
        
        if (arrary_value(pa) <= arrary_value(pb))
        {
            #if merge_sort_dbg
            printf("->%d ", arrary_value(pa));
            #endif
            
            list_add_tail(tmp, pa);
        }
        else
        {
            #if merge_sort_dbg
            printf("->%d ", arrary_value(pb));
            #endif
            
            list_add_tail(tmp, pb);
        }

    }

    if (pa != NULL)
    {
        tmp->next = pa;
    } 
    
    if (pb != NULL)
    {
        tmp->next = pb;
    }            

    pair_a->head = tHead;
    pair_a->tail = tTail;

    pair_a->tail->next = NULL;

    pair_a->status = STATUS_OVER;
    pair_b->status = STATUS_INVALID;


    #if 0
    list_show("\n###", tHead, tTail, 12);
    #endif
    return 0;
}

void* merge_sort_handle(void *arg)
{
    int ret = 0;
    int task_id = merge_task_counter;
    
    merge_pair_t *pair = (merge_pair_t *)arg;

    pthread_set_affinity(merge_task_counter++);

    ret = merge_sort(pair->a, pair->b);
    if (0 != ret)
    {    
        printf("merge_sort error, ret=%d\n", ret);
        return NULL;
    }

    MAX_TASK_OVER_NUM--;
    printf("merg task %d over.\n",task_id);
    //free(pair);
        
    return NULL;
}

void check_list(arrary_t *head, int num)
{
    arrary_t *p1 = head;
    arrary_t *p2 = head->next;

    int i;

    printf("\nStart check merge sort result:\n");
    for (i = 0; i < num; i++)
    {
        if ((p1 != NULL) && (p2 != NULL))
        {
            if (p1->data <= p2->data)
            {
                p1 = p2;
                p2 = p2->next;
            }
            else
            {
                printf("p1(%d)>p2(%d)\n", p1->data, p2->data);
                break;
            }
        }
    }

    if (i < (num-2))
    {
        printf("ERROR. i:%d\n", i);
    }
    else
        printf("OK.\n");

    return;
}


/*
说明:
    该函数实现将快排好的堆合并;
*/
int merge_main(void)
{
    int i, j;

    status_t status = 0;
 
    MAX_TASK_OVER_NUM = MAX_TASK_NUN;   

    //printf("[fun:%s, line:%d]~~~~~~~~~~~~~~~~~~\n", __FUNCTION__, __LINE__);
    
    while (MAX_TASK_OVER_NUM > 1)
    {
        memset(&pair, 0, (sizeof(merge_pair_t)) );

        #if 0
        printf("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
        printf("MAX_TASK_NUN:%d\n", MAX_TASK_NUN);
        printf("MAX_TASK_OVER_NUM:%d\n", MAX_TASK_OVER_NUM);
        #endif
        
        for (i = 0; i < MAX_TASK_NUN; i++)
        {
            
            if (split_task_des[i].status == STATUS_OVER)
            {
                //printf("a:%d\n", i);
                pair.a = split_task_des + i;
            }
            else if (split_task_des[i].status == STATUS_INVALID)
            {
                //printf("split_task_des[%d].status:%d\n", i, split_task_des[i].status);
                continue;
            }
            else
            {
                //printf("split_task_des[%d].status:%d\n", i, split_task_des[i].status);
                continue;
            }

            for (j = (++i); j < MAX_TASK_NUN; j++)
            {
                if (split_task_des[j].status == STATUS_OVER)
                {
                    //printf("b:%d\n", j);

                    pair.b = split_task_des + j;
                    break;
                }
                else if (split_task_des[j].status == STATUS_INVALID)
                {
                    //printf("split_task_des[%d].status:%d\n", j, split_task_des[j].status);
                    continue;
                }
                else
                {
                    //printf("split_task_des[%d].status:%d\n", j, split_task_des[j].status);
                    continue;
                }
            }

            if ((i >= MAX_TASK_NUN)||(j >= MAX_TASK_NUN))
            {
                break;
            }

            //该宏控制合并是并行执行还是串行执行
            #if 0
            //串行
            merge_sort_handle((void *)&pair);
            #else
            //并行
    		pthread_create( &pair.task_id, NULL, merge_sort_handle, (void *)&pair );
    		#endif
    		            
            break;
        }  

        usleep(100);
    }

    while (STATUS_OVER != status)
    {
        for (i = 0; i < MAX_TASK_NUN; i++)
        {
            if (split_task_des[i].status == STATUS_OVER)
            {
                status = STATUS_OVER;
                break;
            }
        }

        usleep(500);
    }

    j = 0;

    #if 1
    list_show("\n###OVER###", split_task_des[i].head, split_task_des[i].tail, 120);
    check_list(split_task_des[i].head, ARRARY_LEN);
    #endif

    return 0;
}


arrary_t *gen_rand_arrary(void)
{
    int i;

    arrary_t *a;
    
    a = (arrary_t *)malloc(sizeof(arrary_t)*ARRARY_LEN);
    if (NULL == a)
    {
        printf("Malloc error.\n");
        return NULL;
    }

    #if 1
    //产生伪随机数
    /*
       遗留问题:
                该函数是在一个核上产生所以随机数。
                gen_rand_arrary_init()是在多个核并行产生随机数,
                但是速度很慢,现怀疑rand()有共享资源需要多个核访问;
    */
    for (i = 0; i < ARRARY_LEN; i++)
    {
        #if 1
        arrary_value(a+i) = rand()%ARRARY_LEN;
        #elif 0
        arrary_value(a+i) = rand();
        #else
        arrary_value(a+i) = ARRARY_LEN - i;
        #endif
        
        a[i].pre  = NULL;
        a[i].next = NULL;
    }
    #endif

    return a;
}

void show_arrary(char *prefix, arrary_t *arrary, int show_num)
{
    int i;
    printf("%16s\n\t", prefix);
    for (i = 0; i < show_num; i++)
    {
        printf("%3d ", arrary_value(arrary + i));
        if ((i+1)%SPLIT_NUM == 0)
            printf("\n\t");
    }
    
    return ;
}


int parse_para(int argc, char *argv[])
{
	int ch;

	if (argc < 3)
	{
	    return -1;
	}
	
	while ((ch = getopt(argc, argv, "hn:s:")) != -1) 
	{
		switch(ch) {
    		case 'n':
    		    ARRARY_LEN = atoi(optarg);
    			break;
    		case 's':
    		    SPLIT_NUM  = atoi(optarg);
    			break;
    			
    		case 'h':
    			return 1;
    			
    		default:
    			return -1;
		}
	}

	return 0;
}

void show_usage(void)
{
	printf("Usage: mergesort -n <total-number> -s <split-num>\n");
	printf("Demostrate how to merg sort.\n");
	printf("\nOptions\n");
	printf("    -n            Specify total number\n");
	printf("    -s            Specify split number\n");
	printf("    -h            Show help information\n");
	printf("\n");
}


/*
说明:
    程序流程:
            1. gen_rand_arrary()产生argv[1]个随机数;
            2. split_sort()将argv[1]个随机数分成大小为argv[2]的堆;
               然后quick_sort_handle()并行的对每个堆进行快排;
            3. merge_main()将快排好的堆进行合并;

入参:
        argv[1]: 排序数量
        argv[2]: 堆大小


*/
#define SHOW_NUMBER 128
int main(int argc, char ** argv)
{
    int ret = 0;
    int i;

    struct timeval t1, t2;

    (void)i;

    ret = parse_para(argc, argv);
    if (0 != ret)
    {
        show_usage();
        return ret;
    }


    #ifdef ARRARY_MALLOC_TRUE
    gettimeofday(&t1, NULL);
    
    printf("Gen rand number :%d ...\n", ARRARY_LEN);
    arrary = gen_rand_arrary();
    if (NULL == arrary)
    {
        printf("[fun:%s, line:%d] error.\n", __FUNCTION__, __LINE__);
        return -1;
    }
    
    show_arrary("Gen over:", arrary, ((SHOW_NUMBER>ARRARY_LEN)? ARRARY_LEN : SHOW_NUMBER));
    gettimeofday(&t2, NULL);

    i = t2.tv_sec-t1.tv_sec;
    printf("Gen random data time(h:m:s): %d:%d:%d .\n", i/3600, i/60, i%60);
    #endif

    
    printf("\nQuick sort %d number...\n", ARRARY_LEN);
    
    ret = split_sort(arrary, ARRARY_LEN, SPLIT_NUM);
    //ret = split_sort(arrary, ARRARY_LEN, 64);
    if (0 != ret)
    {
        printf("[fun:%s, line:%d] error.\n", __FUNCTION__, __LINE__);
        return ret;
    }
    
    //show_arrary("After split sort:", arrary, ((SHOW_NUMBER>ARRARY_LEN) ? ARRARY_LEN : SHOW_NUMBER));
    //fflush(stdout);
    
    ret = merge_main();
    if (0 != ret)
    {
        printf("[fun:%s, line:%d] error.\n", __FUNCTION__, __LINE__);
        return ret;
    }

    
    #ifdef ARRARY_MALLOC_TRUE
    free(arrary);
    #endif
    
    return 0;
}



 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值