Phoenix 分析

mapreduce现在开源的主要是hadoop,它是用JAVA写的,但google自已用的是C++写的,JAVA的东西性能方面还是和C/C++没法比的,从网上找了找,C/C++的开源mapreduce有下面两个,但功能还比较弱,没实现分布式,只能当理论学习学习,

http://mapreduce.stanford.edu/

http://labs.trolltech.com/page/Projects/Threads/QtConcurrent

下面主要分析分析 Phoenix

 

Phoenix就是当硬件上有多个CPU或是多核的CPU时,每个CPU上面绑定一个线程,这些线程先map,之后生成的中间结果再reduce,只是mapreduce原理的简单展示,实际使用效果不大,但原理实现的比较直观

typedef struct
{
    void * task_data;           /* The data to run MapReduce on.
                                 * If splitter is NULL, this should be an array. */
    off_t data_size;            /* Total # of bytes of data */
    int unit_size;              /* # of bytes for one element
                                 * (if necessary, on average) */

    map_t map;                  /* Map function pointer, must be user defined */
    reduce_t reduce;            /* If NULL, identity reduce function is used,
                                 * which emits a keyval pair for each val. */
    combiner_t combiner;        /* If NULL, no combiner would be called. */                            
    splitter_t splitter;        /* If NULL, the array splitter is used.*/
    locator_t locator;          /* If NULL, no locality based optimization is
                                   performed. */
    key_cmp_t key_cmp;          /* Key comparison function.
                                   Must be user defined.*/

    final_data_t *result;       /* Pointer to output data.
                                 * Must be allocated by user */

    /*** Optional arguments must be zero if not used ***/
    partition_t partition;      /* Default partition function is a
                                 * hash function */

    /* Creates one emit queue for each reduce task,
    * instead of per reduce thread. This improves
    * time to emit if data is emitted in order,
    * but can increase merge time. */
    bool use_one_queue_per_task;   

    int L1_cache_size;     /* Size of L1 cache in bytes */
    int num_map_threads;   /* # of threads to run map tasks on.
                                 * Default is one per processor */
    int num_reduce_threads;     /* # of threads to run reduce tasks on.
    * Default is one per processor */
    int num_merge_threads;      /* # of threads to run merge tasks on.
    * Default is one per processor */
    int num_procs;              /* Maximum number of processors to use. */

    int proc_offset;            /* number of procs to skip for thread binding */
                                /* (useful if you have multiple MR's running
                                 * and you don't want them binding to the same
                                 * hardware thread). */

    float key_match_factor;     /* Magic number that describes the ratio of
    * the input data size to the output data size.
    * This is used as a hint. */
} map_reduce_args_t;

这个结构体是初始化传递参数用的,红颜色表示的是针对不同的应用,自已实现不同的函数,在mapreduce的过程中调用;蓝色的表示不同的执行过程的线程数

 

int map_reduce (map_reduce_args_t * args)

{

mr_env_t* env;

env = env_init (args);

map (env);

reduce (env);

merge (env);

}

map_reduce ()这个函数中主要是根据输入的参数分别调用map、reduce、merge三个函数,条理比较清楚

mr_env_t这个结构体比较重要,所有的中间结果,以及map、reduce、merge三个过程调用的线程信息,全部在这里面

typedef struct
{
    /* Parameters. */
    int num_map_tasks;              /* # of map tasks. */
    int num_reduce_tasks;           /* # of reduce tasks. */
    int chunk_size;                 /* # of units of data for each map task. */
    int num_procs;                  /* # of processors to run on. */
    int num_map_threads;            /* # of threads for map tasks. */
    int num_reduce_threads;         /* # of threads for reduce tasks. */
    int num_merge_threads;          /* # of threads for merge tasks. */
    float key_match_factor;         /* # of values likely to be matched
                                       to the same key. */

    bool oneOutputQueuePerMapTask;      /* One output queue per map task? */
    bool oneOutputQueuePerReduceTask;   /* One output queue per reduce task? */

    int intermediate_task_alloc_len;

    /* Callbacks. */
    map_t map;                      /* Map function. */
    reduce_t reduce;                /* Reduce function. */
    combiner_t combiner;            /* Combiner function. */
    partition_t partition;          /* Partition function. */    
    splitter_t splitter;            /* Splitter function. */
    locator_t locator;              /* Locator function. */
    key_cmp_t key_cmp;              /* Key comparator function. */

    /* Structures. */
    map_reduce_args_t * args;       /* Args passed in by the user. */
    thread_info_t * tinfo;          /* Thread information array. */

    keyvals_arr_t **intermediate_vals;
                                    /* Array to send to reduce task. */

    keyval_arr_t *final_vals;       /* Array to send to merge task. */
    keyval_arr_t *merge_vals;       /* Array to send to user. */

    uintptr_t splitter_pos;         /* Tracks position in array_splitter(). */

    /* Policy for mapping threads to cpus. */
    sched_policy    *schedPolicies[TASK_TYPE_TOTAL];


    taskQ_t         *taskQueue;     /* Queues of tasks. */
    tpool_t         *tpool;         /* Thread pool. */
} mr_env_t;

mr_env_t结构体中,比较重要的:一是taskQ_t         *taskQueue;保存了子任的信息,另一个是tpool_t         *tpool;         保存了所用到的线程的信息;别的字段很多是从map_reduce_args_t结构体中拷贝过来的

typedef struct {
    union {
        uint64_t        v[4];
        struct {
            uint64_t    id;
            uint64_t    len; //保存任务长度
            uint64_t    data; //保存任务数据
            uint64_t    pad;
        };
    };
} task_t;

typedef struct {
    task_t              task;
    queue_elem_t        queue_elem;
} tq_entry_t;

typedef struct {
    mr_lock_t parent;
    uintptr_t chksum;
    mr_lock_t *per_thread;
} tq_lock_t;

struct taskQ_t {
    int             num_queues;
    int             num_threads;
    queue_t         **queues;
    queue_t         **free_queues;
    tq_lock_t       *locks;
    /* putting all seeds together may lead to extra coherence traffic among cpus
     * if it's a problem we can pad it by l1 line size */
    /* per-thread random seed */
    unsigned int    *seeds;
};

taskQ_t 组成了一个队列,任务的信息存储在了task_t之中,

typedef struct {
    sem_t           sem_run;
    unsigned int    *num_workers_done;
    sem_t           *sem_all_workers_done;
    thread_func     *thread_func;
    void            **thread_func_arg;
    void            **ret;
    int             *num_workers;
    int             *die;
} thread_arg_t;

struct tpool_t {
    int             num_threads;
    int             num_workers;
    int             die;
    thread_func     thread_func;
    sem_t           sem_all_workers_done;
    unsigned int    num_workers_done;
    void            **args;
    pthread_t       *threads;//存储线程数组
    thread_arg_t    *thread_args;
};

线程池其实就是一个数组,根据CPU的个数决定数组的大小,单个线程的参数和返回结果存在thread_arg_t之中,根据单个条件变量 sem_t           *sem_all_workers_done;决定单个线程的执行,当单个线程执行完时,会增加 sem_t           sem_all_workers_done;这个总的变量

static void map (mr_env_t* env)
{
    thread_arg_t   th_arg;
    int            num_map_tasks;

    num_map_tasks = gen_map_tasks (env);
    assert (num_map_tasks >= 0);

    env->num_map_tasks = num_map_tasks;
    if (num_map_tasks < env->num_map_threads)
        env->num_map_threads = num_map_tasks;

    //printf (OUT_PREFIX "num_map_tasks = %d/n", env->num_map_tasks);

    mem_memset (&th_arg, 0, sizeof(thread_arg_t));
    th_arg.task_type = TASK_TYPE_MAP;//类型不一样

    start_workers (env, &th_arg);
}

map、reduce、merge这三个函数主要是调用start_workers 这个函数,主要是类型不一样

static void
start_workers (mr_env_t* env, thread_arg_t *th_arg)
{
    int             thread_index;
    TASK_TYPE_T     task_type;
    int             num_threads;
    int             cpu;
    intptr_t        ret_val;
    thread_arg_t    **th_arg_array;
    void            **rets;
#ifdef TIMING
    uint64_t        work_time = 0;
    uint64_t        user_time = 0;
    uint64_t        combiner_time = 0;
#endif

    assert(th_arg != NULL);

    task_type = th_arg->task_type;
    num_threads = getNumTaskThreads (env, task_type);

    env->tinfo = (thread_info_t *)mem_calloc (
        num_threads, sizeof (thread_info_t));
    th_arg->env = env;

    th_arg_array = (thread_arg_t **)mem_malloc (
        sizeof (thread_arg_t *) * num_threads);
    CHECK_ERROR (th_arg_array == NULL);

    for (thread_index = 0; thread_index < num_threads; ++thread_index) {

        cpu = sched_thr_to_cpu (env->schedPolicies[task_type], thread_index + env->args->proc_offset);
        th_arg->cpu_id = cpu;
        th_arg->thread_id = thread_index;

        th_arg_array[thread_index] = mem_malloc (sizeof (thread_arg_t));
        CHECK_ERROR (th_arg_array[thread_index] == NULL);
        mem_memcpy (th_arg_array[thread_index], th_arg, sizeof (thread_arg_t));
    }

    start_thread_pool (
        env->tpool, task_type, &th_arg_array[1], num_threads - 1);

    dprintf("Status: All %d threads have been created/n", num_threads);

    ret_val = (intptr_t)start_my_work (th_arg_array[0]);
#ifdef TIMING
    thread_timing_t *timing = (thread_timing_t *)ret_val;
    work_time += timing->work_time;
    user_time += timing->user_time;
    combiner_time += timing->combiner_time;
    mem_free (timing);
#endif
    mem_free (th_arg_array[0]);

    /* Barrier, wait for all threads to finish. */
    CHECK_ERROR (tpool_wait (env->tpool));
    rets = tpool_get_results (env->tpool);

    for (thread_index = 1; thread_index < num_threads; ++thread_index)
    {
#ifdef TIMING
        ret_val = (intptr_t)rets[thread_index - 1];
        thread_timing_t *timing = (thread_timing_t *)ret_val;
        work_time += timing->work_time;
        user_time += timing->user_time;
        combiner_time += timing->combiner_time;
        mem_free (timing);
#endif
        mem_free (th_arg_array[thread_index]);
    }

    mem_free (th_arg_array);
    mem_free (rets);

#ifdef TIMING
    switch (task_type)
    {
        case TASK_TYPE_MAP:
            fprintf (stderr, "map work time: %" PRIu64 "/n",
                                        work_time / num_threads);
            fprintf (stderr, "map user time: %" PRIu64 "/n",
                                        user_time / num_threads);
            fprintf (stderr, "map combiner time: %" PRIu64 "/n",
                                        combiner_time / num_threads);
            break;

        case TASK_TYPE_REDUCE:
            fprintf (stderr, "reduce work time: %" PRIu64 "/n",
                                        work_time / num_threads);
            fprintf (stderr, "reduce user time: %" PRIu64 "/n",
                                        user_time / num_threads);
            break;

        case TASK_TYPE_MERGE:
            fprintf (stderr, "merge work time: %" PRIu64 "/n",
                                        work_time / num_threads);

        default:
            break;
    }
#endif

    mem_free(env->tinfo);
    dprintf("Status: All tasks have completed/n");
}

start_workers 分配CPU和线程等资源,然后启动线程执行,在线程中分用调用用户定义的

map_t map;                  /* Map function pointer, must be user defined */
    reduce_t reduce;            /* If NULL, identity reduce function is used,
                                 * which emits a keyval pair for each val. */
    combiner_t combiner;        /* If NULL, no combiner would be called. */                            
    splitter_t splitter;        /* If NULL, the array splitter is used.*/
    locator_t locator;          /* If NULL, no locality based optimization is
                                   performed. */
    key_cmp_t key_cmp;          /* Key comparison function.
                                   Must be user defined.*/

完成计算

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值