ort_private.h

#ifndef ORT_PRIVE_H
#define ORT_PRIVE_H
#include "ort.h"
#include "pthreads/ee.h"

//定义最大的活跃并行域数目限制
#define MAXACTIVEREGIONS 50

//定义工作分担类型,工作分担有三中:SINGLE、SECTIONS、FOR
#define _OMP_SINGLE   0
#define _OMP_SECTIONS 1
#define _OMP_FOR      2

//定义等待策略
#define _OMP_ACTIVE   0
#define _OMP_PASSIVE  1

//设置eelib界面,将线程库界面映射为ee界面
#define ee_key_t             othr_key_t
#define ee_key_create        othr_key_create
#define ee_getspecific       othr_getspecific
#define ee_setspecific       othr_setspecific

#define ee_initialize        othr_initialize
#define ee_finalize          othr_finalize
#define ee_request           othr_request
#define ee_create            othr_create
#define ee_yield             othr_yield
#define ee_waitall           othr_waitall

#define ee_lock_t            othr_lock_t
#define ee_init_lock         othr_init_lock
#define ee_destroy_lock      othr_destroy_lock
#define ee_set_lock          othr_set_lock
#define ee_unset_lock        othr_unset_lock
#define ee_test_lock         othr_test_lock

#define ee_barrier_t         ort_defbar_t
#define ee_barrier_init      ort_default_barrier_init
#define ee_barrier_destroy   ort_default_barrier_destroy
#define ee_barrier_wait      ort_default_barrier_wait


//这个宏用来检查是不是当前eelib必须在嵌套的时候支持动态调整线程数目,supports_nested_nondynamic为1时表示
//嵌套的时候不需要动态调整线程数目的支持。
#define check_nested_dynamic(n,d)\
          if ((n) && !(d) && !ort->eecaps.supports_nested_nondynamic) {\
            ort_warning("the EE library reports that nested and NOT dynamic\n"\
                        "   parallelism cannot be supported.\n"\
                        "   Try enabling dynamic adjustment using either of:\n"\
                        "    >> OMP_DYNAMIC environmental variable, or\n"\
                        "    >> omp_set_dynamic() call.\n\n"\
                   "*** disabling support for nested parallelism for now ***\n"\
                   "[end of ORT warning]\n");\
            (n) = 0;\
          }


//不同的调度方式
#define YIELD_IMMEDIATELY 0
#define YIELD_FREQUENTLY  50
#define YIELD_OCCASIONALY 150
#define YIELD_RARELY      500
#define BAR_YIELD         50
//OMPi等待宏
#define OMPI_WAIT_WHILE(f, trials_before_yielding) { \
          volatile int time = 0; \
          for ( ; (f); time++) \
            if (time == (trials_before_yielding)) { \
              time = -1; \
              ee_yield(); \
            }; \
        }

//如果给定参数为零,则设置它为非0,并写入内存
#define testnotset(X) if((X)==0) {(X)=1; FENCE;}

//定义任务队列长度
#define TASKQUEUESIZE (ort->taskqueuesize)
//定义动态任务队列长度
#define DYNAMIC_TASKQUEUESIZE (ort->dynamic_taskqueuesize)
//定义OMPI任务偷取策略
#define OMPI_STEAL_POLICY (ort->ompi_steal_policy)

//X参数为一个指向ort_task_node_t节点的指针,Y为任务函数参数的大小
//新建一个参数存储空间,并且让此空间的首位存储该任务节点的地址,所以任务函数的真正参数是funcarg指向的下一个位置
#define ALLOCATE_ENV(X,Y) {\
      (X).funcarg = ort_calloc((Y)+ sizeof(void*)); \
      *((void **)((X).funcarg)) = &(X); \
      }

//??
#define ALLOCATE_PENV(X,Y) { \
      (X)->funcarg = ort_calloc((Y)+sizeof(void*));\
      *((void **)((X)->funcarg)) = (X);\
      }

//X之后的一个指针的值
#define NP(X) (void*)(((char*)(X)) + sizeof(void*))
//X之前的一个指针的值
#define PP(X) (((char*)(X)) - sizeof(void*))

//计算A所在cacheline末尾剩下的字节数目
#define CHAR_PAD_CACHE(A) ((CACHE_LINE - (A % CACHE_LINE))/sizeof(char))

//计算任务池的大小
#define TASKPOOLSIZE(X) (TASKQUEUESIZE+(X)+3)

//读取X所在地址的一个整型值
#define atomic_read(X) *((int*)X)


//任务节点的前半部分,它的作用只是为了计算下面Node结构体中pad数组的大小
struct half_node
{
  void            *(*func)(void *);
  struct Node     *parent;
  volatile struct Node *next;
  int             isfinal;
};

//定义任务节点结构体
typedef struct Node
{
  void            *(*func)(void *);//任务函数
  struct Node     *parent;//父任务的地址
  struct Node     *next;//用于将回收站中的任务节点链接成链表
  int             isfinal;//是否是终止任务
  //占用完整的cacheline,防止伪共享
  char pad[CHAR_PAD_CACHE(sizeof(struct half_node))];

  void            *funcarg;//任务函数的参数
  volatile int     num_children;//当前任务的子任务数量
  /* Check out whether i inherited task node from my father */
  int inherit_task_node;//用于任务节点重用?记录被重用的次数
  volatile int    occupied;//表明该任务节点的使用状态,若为0则表示没有被使用,若非0时表示正在使用,为1时表示是sub-pool中的节点,若为2则为回收站中的节点
  ort_task_icvs_t  icvs;//局部的内部控制变量
} ort_task_node_t;


//任务节点池结构体,每个eecb有多个任务节点池,它们被组织成链表形式
typedef struct task_node_pool ort_task_node_pool_t;
struct task_node_pool {
  void  *(*task_func)(void *);//该任务节点池所属的函数
  int                   capacity;
  ort_task_node_t      *sub_pool;//任务节点数组的指针
  ort_task_node_t      *recycler;//回收站的头指针,这是一个链表的头指针
  ort_task_node_pool_t *next;//指向下一个任务节点池
};


typedef struct Queue
{
    volatile int top;//队列的顶部
    volatile int bottom;//队列的底部,底部的值大于顶部的值
    ort_task_node_t** tasks;

    /* Pointers to task-counters of my children implicit task */
    volatile int *implicit_task_children;
} ort_task_queue_t;

/* Hold data for task implementation */
typedef struct {
  /* Thread's private data */
  /* Have to know what task i currently execute. Needed in task wait */
  ort_task_node_t *current_executing_task;
  /* Common data between me and rest threads of the group (my children).*/
  /* Obviusly used when i become father */
  /* table that holds task queues of all threads in my group */
  ort_task_queue_t* queue_table;
  /* Task environment pool */
  ort_task_node_pool_t* task_node_pool;
  /* Maximum number of mates in my team */
  int max_mates;
  /* Maximum number of children that i have created */
  int max_children;
  /* If tasks are left to be done from the members of my group */
  volatile int never_task;
} ort_tasking_t;

/*
 * Other types
 */


/* Ordered data. */
typedef struct {
  int       next_iteration;  /* Low bound of the chunk that should be next */
  ee_lock_t lock;
} ort_ordered_info_t;


/* For FOR loops */
typedef struct {
  /* lb is initialized to the loop's initial lower bound. During execution,
   * it represents the "current" lower bound, i.e. the next iteration to be
   * scheduled.
   * *** IT IS ONLY USED FOR THE GUIDED & DYNAMIC SCHEDULES ***
   */
  volatile int       iter;          /* The next iteration to be scheduled */
  ort_ordered_info_t ordering;      /* Bookeeping for the ORDERED clause */
} ort_forloop_t;


/* For workshare regions */
typedef struct
{
  ee_lock_t    reglock;                         /* Lock for the region */
  volatile int empty;                 /* True if no thread entered yet */
  volatile int left;            /* # threads that have left the region */
  int          inited;              /* 1 if the region was initialized */

  /* SECTIONS & FOR specific data */
  volatile int  sectionsleft; /* Remaining # sections to be given away */
  ort_forloop_t forloop;      /* Stuff for FOR regions */
} wsregion_t;


/* A table of simultaneously active workshare regions */
typedef struct {
  /* This is for BLOCKING (i.e. with no NOWAIT clause) regions. */
  wsregion_t blocking;
  /* This is for keeping track of active NOWAIT regions.  */
  volatile int headregion, tailregion;   /* volatile since all threads watch */
  wsregion_t active[MAXACTIVEREGIONS];
} ort_workshare_t;


/* Holds pointers to copyprivate vars. */
typedef struct {
  volatile void  **data;
  int            owner;
  int            copiers;
  ee_lock_t      lock;
} ort_cpriv_t;


/* Holds the key-value pairs for threadprivate variables */
typedef struct {
  int  alloted;    /* size of vars table */
  void **vars;
} ort_tptable_t;


/* Execution entity control block (eecb).
 * ORT keeps such a block for every ee; it contains fields necessary
 * for runtime bookkeeping.
 * The eecb's form a tree, where child ee's have pointers to their
 * parent's eecb.
 */
typedef struct ort_eecb_s ort_eecb_t;
struct ort_eecb_s {
  /*
   * The barrier is declared first, hoping it will be cache aligned
   */

  /* First, the fields used by my children (when I am the parent)
   */
  ee_barrier_t    barrier;                         /* Barrier for my children */
  int             have_created_team;  /* 1 if I was a team parent in the past */
  int             num_children;
  void            *(*workfunc)(void *);   /* The func executed by my children */
  ort_workshare_t workshare;     /* Some fields volatile since children snoop */
  ort_cpriv_t     copyprivate;  /* For copyprivate; owner stores data here and
                                   the rest of the children grab it from here */
  ort_tptable_t   *tpkeys;               /* Threadprivate vars of my children */
  int             tpksize;         /* in essence, max # children ever created */
  ort_eecb_t      *me_master;      /* For use when i become master of a group */

  /* Fields for me, as a member of a team
   */
  ort_eecb_t *parent;

  int thread_num;                                /* Thread id within the team */
  int num_siblings;                                   /* # threads in my team */
  int level;                            /* At what level of parallelism I lie */
  int activelevel;             /* At what *active* level of parallelism I lie */
  void *shared_data;          /* Pointer to shared struct of current function */
  ort_eecb_t *sdn;      /* Where I will get shared data from; normally
                                   from my parent, except at a false parallel
                                   where I get it from myself since I am
                                   the only thread to execute the region. */
  int mynextNWregion;        /* Non-volatile; I'm the only thread to use this */
  int chunklb;             /* Non-volatile; my current chunk's first and last */
  int chunkub;           /* iterations; change for each chunk i execute through
                          ort_thischunk_range(); only used in ordered_begin() */
  int nowaitregion;             /*  True if my current region is a NOWAIT one */

  /* Tasking structures
   */
#if !defined(AVOID_OMPI_DEFAULT_TASKS)
  ort_tasking_t tasking;
#endif
  /* Thread-library specific data
   */
  void *ee_info;                               /* Handled by the ee library */
};

/* List holding pointers to user's shared global variables (only if ee=proc) */
typedef struct ort_sglvar_s ort_sglvar_t;
struct ort_sglvar_s {
  void         **varptr;      /* Pointer to user's global variable */
  int          size;          /* sizeof(var) */
  void         *initvalue;    /* Initializer */
  ort_sglvar_t *next;
};


#define MAXNTHRLEVS 10
#define FIFO        1
#define LIFO        0

/* All global variables ORT handles; if ee=proc, this is also placed
 * in shared memory.
 */
typedef struct {
    ort_icvs_t         icvs;
    ort_caps_t         eecaps;
    volatile ee_lock_t atomic_lock;       /* Global lock for atomic */
    volatile ee_lock_t preparation_lock;  /* For initializing user locks */
    int                thrpriv_num;       /* # threadprivate variables */
    int                nthr_per_level[MAXNTHRLEVS];//存储每个层次的线程数目限定
    int                set_nthrlevs;      /* # levels of nthreads defined */
    int                ompi_steal_policy; /* Worker steal? FIFO:LIFO */
    int                taskqueuesize;     /* Size of task queues */
    int                dynamic_taskqueuesize; /* Adapt task queuesize */
  } ort_vars_t;


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                       *
 *  VARIABLES & MORE MACROS                              *
 *                                                       *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


extern ort_vars_t *ort;



extern ee_key_t eecb_key;
#define __SETMYCB(v) ee_setspecific(eecb_key,v)
#define __MYCB       ((ort_eecb_t *) ee_getspecific(eecb_key))

#define __CURRTASK(eecb)         ((eecb)->tasking.current_executing_task)
#define __SETCURRTASK(eecb,task) ((eecb)->tasking.current_executing_task = task)
#define __INHERITASK(eecb)       ((eecb)->tasking.current_executing_task->inherit_task_node)
#define __FINALTASK(eecb)        ((eecb)->tasking.current_executing_task->isfinal)


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                         *
 *  FUNCTIONS etc (also used by the parser, see ort.defs)  *
 *                                                         *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


int   ort_initialize(int *argc, char ***argv);
void  ort_finalize(int exitval);
void *ort_calloc(int size);
void  ort_execute_serial(void *(*func)(void *), void *shared);
void  ort_execute_parallel(int numthreads, void *(*func)(void *), void *shared,
                           int iscombined);
void  ort_prepare_omp_lock(omp_lock_t *lock, int type);

void *ort_get_shared_vars(void *);
void *ort_get_thrpriv(void **key, int size, void *origvar);
void  ort_sglvar_allocate(void **dataptr, int size, void *initer);
void  ort_fence();

/* Atomic, critical, reduction, copyprivate and barrier */
void ort_atomic_begin();
void ort_atomic_end();
void ort_critical_begin(void **cl);
void ort_critical_end(void **cl);
void ort_reduction_begin(void **cl);
void ort_reduction_end(void **cl);
void ort_broadcast_private(int num, ...);
void ort_copy_private(int num, ...);

/*
 * From ort_barrier.c
 */

void ort_barrier_me(void);
void parallel_barrier_wait(ort_defbar_t *bar, int eeid);

/*
 * From ort_ws.c
 */

/* Structure to optimize guided/dynamic schedules.
 * We compute it once and reuse it in every call to ort_get_xxx_chunk().
 * Such things are declared by the compiler (as void *), are initialized
 * during ort_entering_for() and are utilized in every call to
 * ort_get_xxx_chunk() (static schedules ignore this, though).
 */
typedef struct _ort_gdopt_
        {
          volatile int  *data;  /* Denotes the current iter of the loop */
          volatile void *lock;  /* Lock to access *data */
          int           nth;    /* # siblings */
          void          *me;    /* my info node */
        } ort_gdopt_t;

  /* Workshare-related functions */
void init_workshare_regions(ort_eecb_t *me);
int  ort_mysingle(int hasnowait);
void ort_leaving_single();
void ort_entering_sections(int hasnowait, int numberofsections);
void ort_leaving_sections();
int  ort_get_section();
void ort_entering_for(int nowait, int hasordered, ort_gdopt_t *t);
int  ort_leaving_for();
void ort_ordered_begin();
void ort_ordered_end();
void ort_thischunk_range(int lb, int ub);

  /* For schedules support */
typedef int (*chunky_t)(int niters, int chunksize, int *fiter, int *liter,
            int *extra, ort_gdopt_t *opt);

int  ort_num_iters(int num, ...);
void ort_get_runtime_schedule_stuff(chunky_t *func, int *chunksize);
int  ort_get_guided_chunk(int niters, int chunksize, int *fiter, int *liter,
                          int *ignored, ort_gdopt_t *t);
int  ort_get_dynamic_chunk(int niters, int chunksize, int *fiter, int *liter,
                           int *ignored, ort_gdopt_t *t);
int  ort_get_runtimestatic_chunk(int niters, int chunksize,
                          int *fiter, int *liter, int *chunkid, ort_gdopt_t *t);
int  ort_get_static_default_chunk(int niters, int *from, int *to);

/*
 * From ort_tasks.c
 */

void  ort_init_tasking();
void  ort_new_task(int final, int untied, void *(*func)(void *arg), void *arg);
void  ort_taskwait(int waitall);
int   ort_task_throttling(void);
void  ort_create_task_immediate_node(ort_eecb_t *thr);
void *ort_task_immediate_start(int final);
void  ort_task_immediate_end(void *tn);
void  ort_execute_my_tasks(ort_eecb_t *me);
void  ort_start_implicit_task(ort_eecb_t *thr);
void  ort_finish_implicit_task(ort_eecb_t *thr);

/*
 * From ort_pools.c
 */

ort_task_node_t* ort_task_alloc(void* (*func)(void *), void *arg);
void             ort_task_free(ort_eecb_t *thr, ort_task_node_t* node);
void             task_pools_init(ort_eecb_t *t);
void            *ort_taskenv_alloc(int size, void *(*task_func)(void *));
ort_task_node_t* ort_task_empty_node_alloc(void);
void             ort_taskenv_free(void *ptr, void *(*task_func)(void *));

/*
 * From ort_workstealing.c
 */

void             ort_task_queues_init(ort_eecb_t *me, int nthr);
int              ort_task_worker_enqueue(ort_eecb_t *me, void* (*func)(void *),
                                   void *arg, int final);
ort_task_node_t *ort_task_worker_dequeue(ort_eecb_t *me);
ort_task_node_t *ort_task_thief_steal(ort_eecb_t *me, int victim_id);


#endif // ORT_PRIVE_H

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值