#ifndef ORT_PRIVE_H
#define ORT_PRIVE_H
#include "ort.h"
#include "pthreads/ee.h"
//定义最大的活跃并行域数目限制
#define MAXACTIVEREGIONS 50
//定义工作分担类型,工作分担有三中:SINGLE、SECTIONS、FOR
#define _OMP_SINGLE 0
#define _OMP_SECTIONS 1
#define _OMP_FOR 2
//定义等待策略
#define _OMP_ACTIVE 0
#define _OMP_PASSIVE 1
//设置eelib界面,将线程库界面映射为ee界面
#define ee_key_t othr_key_t
#define ee_key_create othr_key_create
#define ee_getspecific othr_getspecific
#define ee_setspecific othr_setspecific
#define ee_initialize othr_initialize
#define ee_finalize othr_finalize
#define ee_request othr_request
#define ee_create othr_create
#define ee_yield othr_yield
#define ee_waitall othr_waitall
#define ee_lock_t othr_lock_t
#define ee_init_lock othr_init_lock
#define ee_destroy_lock othr_destroy_lock
#define ee_set_lock othr_set_lock
#define ee_unset_lock othr_unset_lock
#define ee_test_lock othr_test_lock
#define ee_barrier_t ort_defbar_t
#define ee_barrier_init ort_default_barrier_init
#define ee_barrier_destroy ort_default_barrier_destroy
#define ee_barrier_wait ort_default_barrier_wait
//这个宏用来检查是不是当前eelib必须在嵌套的时候支持动态调整线程数目,supports_nested_nondynamic为1时表示
//嵌套的时候不需要动态调整线程数目的支持。
#define check_nested_dynamic(n,d)\
if ((n) && !(d) && !ort->eecaps.supports_nested_nondynamic) {\
ort_warning("the EE library reports that nested and NOT dynamic\n"\
" parallelism cannot be supported.\n"\
" Try enabling dynamic adjustment using either of:\n"\
" >> OMP_DYNAMIC environmental variable, or\n"\
" >> omp_set_dynamic() call.\n\n"\
"*** disabling support for nested parallelism for now ***\n"\
"[end of ORT warning]\n");\
(n) = 0;\
}
//不同的调度方式
#define YIELD_IMMEDIATELY 0
#define YIELD_FREQUENTLY 50
#define YIELD_OCCASIONALY 150
#define YIELD_RARELY 500
#define BAR_YIELD 50
//OMPi等待宏
#define OMPI_WAIT_WHILE(f, trials_before_yielding) { \
volatile int time = 0; \
for ( ; (f); time++) \
if (time == (trials_before_yielding)) { \
time = -1; \
ee_yield(); \
}; \
}
//如果给定参数为零,则设置它为非0,并写入内存
#define testnotset(X) if((X)==0) {(X)=1; FENCE;}
//定义任务队列长度
#define TASKQUEUESIZE (ort->taskqueuesize)
//定义动态任务队列长度
#define DYNAMIC_TASKQUEUESIZE (ort->dynamic_taskqueuesize)
//定义OMPI任务偷取策略
#define OMPI_STEAL_POLICY (ort->ompi_steal_policy)
//X参数为一个指向ort_task_node_t节点的指针,Y为任务函数参数的大小
//新建一个参数存储空间,并且让此空间的首位存储该任务节点的地址,所以任务函数的真正参数是funcarg指向的下一个位置
#define ALLOCATE_ENV(X,Y) {\
(X).funcarg = ort_calloc((Y)+ sizeof(void*)); \
*((void **)((X).funcarg)) = &(X); \
}
//??
#define ALLOCATE_PENV(X,Y) { \
(X)->funcarg = ort_calloc((Y)+sizeof(void*));\
*((void **)((X)->funcarg)) = (X);\
}
//X之后的一个指针的值
#define NP(X) (void*)(((char*)(X)) + sizeof(void*))
//X之前的一个指针的值
#define PP(X) (((char*)(X)) - sizeof(void*))
//计算A所在cacheline末尾剩下的字节数目
#define CHAR_PAD_CACHE(A) ((CACHE_LINE - (A % CACHE_LINE))/sizeof(char))
//计算任务池的大小
#define TASKPOOLSIZE(X) (TASKQUEUESIZE+(X)+3)
//读取X所在地址的一个整型值
#define atomic_read(X) *((int*)X)
//任务节点的前半部分,它的作用只是为了计算下面Node结构体中pad数组的大小
struct half_node
{
void *(*func)(void *);
struct Node *parent;
volatile struct Node *next;
int isfinal;
};
//定义任务节点结构体
typedef struct Node
{
void *(*func)(void *);//任务函数
struct Node *parent;//父任务的地址
struct Node *next;//用于将回收站中的任务节点链接成链表
int isfinal;//是否是终止任务
//占用完整的cacheline,防止伪共享
char pad[CHAR_PAD_CACHE(sizeof(struct half_node))];
void *funcarg;//任务函数的参数
volatile int num_children;//当前任务的子任务数量
/* Check out whether i inherited task node from my father */
int inherit_task_node;//用于任务节点重用?记录被重用的次数
volatile int occupied;//表明该任务节点的使用状态,若为0则表示没有被使用,若非0时表示正在使用,为1时表示是sub-pool中的节点,若为2则为回收站中的节点
ort_task_icvs_t icvs;//局部的内部控制变量
} ort_task_node_t;
//任务节点池结构体,每个eecb有多个任务节点池,它们被组织成链表形式
typedef struct task_node_pool ort_task_node_pool_t;
struct task_node_pool {
void *(*task_func)(void *);//该任务节点池所属的函数
int capacity;
ort_task_node_t *sub_pool;//任务节点数组的指针
ort_task_node_t *recycler;//回收站的头指针,这是一个链表的头指针
ort_task_node_pool_t *next;//指向下一个任务节点池
};
typedef struct Queue
{
volatile int top;//队列的顶部
volatile int bottom;//队列的底部,底部的值大于顶部的值
ort_task_node_t** tasks;
/* Pointers to task-counters of my children implicit task */
volatile int *implicit_task_children;
} ort_task_queue_t;
/* Hold data for task implementation */
typedef struct {
/* Thread's private data */
/* Have to know what task i currently execute. Needed in task wait */
ort_task_node_t *current_executing_task;
/* Common data between me and rest threads of the group (my children).*/
/* Obviusly used when i become father */
/* table that holds task queues of all threads in my group */
ort_task_queue_t* queue_table;
/* Task environment pool */
ort_task_node_pool_t* task_node_pool;
/* Maximum number of mates in my team */
int max_mates;
/* Maximum number of children that i have created */
int max_children;
/* If tasks are left to be done from the members of my group */
volatile int never_task;
} ort_tasking_t;
/*
* Other types
*/
/* Ordered data. */
typedef struct {
int next_iteration; /* Low bound of the chunk that should be next */
ee_lock_t lock;
} ort_ordered_info_t;
/* For FOR loops */
typedef struct {
/* lb is initialized to the loop's initial lower bound. During execution,
* it represents the "current" lower bound, i.e. the next iteration to be
* scheduled.
* *** IT IS ONLY USED FOR THE GUIDED & DYNAMIC SCHEDULES ***
*/
volatile int iter; /* The next iteration to be scheduled */
ort_ordered_info_t ordering; /* Bookeeping for the ORDERED clause */
} ort_forloop_t;
/* For workshare regions */
typedef struct
{
ee_lock_t reglock; /* Lock for the region */
volatile int empty; /* True if no thread entered yet */
volatile int left; /* # threads that have left the region */
int inited; /* 1 if the region was initialized */
/* SECTIONS & FOR specific data */
volatile int sectionsleft; /* Remaining # sections to be given away */
ort_forloop_t forloop; /* Stuff for FOR regions */
} wsregion_t;
/* A table of simultaneously active workshare regions */
typedef struct {
/* This is for BLOCKING (i.e. with no NOWAIT clause) regions. */
wsregion_t blocking;
/* This is for keeping track of active NOWAIT regions. */
volatile int headregion, tailregion; /* volatile since all threads watch */
wsregion_t active[MAXACTIVEREGIONS];
} ort_workshare_t;
/* Holds pointers to copyprivate vars. */
typedef struct {
volatile void **data;
int owner;
int copiers;
ee_lock_t lock;
} ort_cpriv_t;
/* Holds the key-value pairs for threadprivate variables */
typedef struct {
int alloted; /* size of vars table */
void **vars;
} ort_tptable_t;
/* Execution entity control block (eecb).
* ORT keeps such a block for every ee; it contains fields necessary
* for runtime bookkeeping.
* The eecb's form a tree, where child ee's have pointers to their
* parent's eecb.
*/
typedef struct ort_eecb_s ort_eecb_t;
struct ort_eecb_s {
/*
* The barrier is declared first, hoping it will be cache aligned
*/
/* First, the fields used by my children (when I am the parent)
*/
ee_barrier_t barrier; /* Barrier for my children */
int have_created_team; /* 1 if I was a team parent in the past */
int num_children;
void *(*workfunc)(void *); /* The func executed by my children */
ort_workshare_t workshare; /* Some fields volatile since children snoop */
ort_cpriv_t copyprivate; /* For copyprivate; owner stores data here and
the rest of the children grab it from here */
ort_tptable_t *tpkeys; /* Threadprivate vars of my children */
int tpksize; /* in essence, max # children ever created */
ort_eecb_t *me_master; /* For use when i become master of a group */
/* Fields for me, as a member of a team
*/
ort_eecb_t *parent;
int thread_num; /* Thread id within the team */
int num_siblings; /* # threads in my team */
int level; /* At what level of parallelism I lie */
int activelevel; /* At what *active* level of parallelism I lie */
void *shared_data; /* Pointer to shared struct of current function */
ort_eecb_t *sdn; /* Where I will get shared data from; normally
from my parent, except at a false parallel
where I get it from myself since I am
the only thread to execute the region. */
int mynextNWregion; /* Non-volatile; I'm the only thread to use this */
int chunklb; /* Non-volatile; my current chunk's first and last */
int chunkub; /* iterations; change for each chunk i execute through
ort_thischunk_range(); only used in ordered_begin() */
int nowaitregion; /* True if my current region is a NOWAIT one */
/* Tasking structures
*/
#if !defined(AVOID_OMPI_DEFAULT_TASKS)
ort_tasking_t tasking;
#endif
/* Thread-library specific data
*/
void *ee_info; /* Handled by the ee library */
};
/* List holding pointers to user's shared global variables (only if ee=proc) */
typedef struct ort_sglvar_s ort_sglvar_t;
struct ort_sglvar_s {
void **varptr; /* Pointer to user's global variable */
int size; /* sizeof(var) */
void *initvalue; /* Initializer */
ort_sglvar_t *next;
};
#define MAXNTHRLEVS 10
#define FIFO 1
#define LIFO 0
/* All global variables ORT handles; if ee=proc, this is also placed
* in shared memory.
*/
typedef struct {
ort_icvs_t icvs;
ort_caps_t eecaps;
volatile ee_lock_t atomic_lock; /* Global lock for atomic */
volatile ee_lock_t preparation_lock; /* For initializing user locks */
int thrpriv_num; /* # threadprivate variables */
int nthr_per_level[MAXNTHRLEVS];//存储每个层次的线程数目限定
int set_nthrlevs; /* # levels of nthreads defined */
int ompi_steal_policy; /* Worker steal? FIFO:LIFO */
int taskqueuesize; /* Size of task queues */
int dynamic_taskqueuesize; /* Adapt task queuesize */
} ort_vars_t;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* VARIABLES & MORE MACROS *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * */
extern ort_vars_t *ort;
extern ee_key_t eecb_key;
#define __SETMYCB(v) ee_setspecific(eecb_key,v)
#define __MYCB ((ort_eecb_t *) ee_getspecific(eecb_key))
#define __CURRTASK(eecb) ((eecb)->tasking.current_executing_task)
#define __SETCURRTASK(eecb,task) ((eecb)->tasking.current_executing_task = task)
#define __INHERITASK(eecb) ((eecb)->tasking.current_executing_task->inherit_task_node)
#define __FINALTASK(eecb) ((eecb)->tasking.current_executing_task->isfinal)
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* FUNCTIONS etc (also used by the parser, see ort.defs) *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
int ort_initialize(int *argc, char ***argv);
void ort_finalize(int exitval);
void *ort_calloc(int size);
void ort_execute_serial(void *(*func)(void *), void *shared);
void ort_execute_parallel(int numthreads, void *(*func)(void *), void *shared,
int iscombined);
void ort_prepare_omp_lock(omp_lock_t *lock, int type);
void *ort_get_shared_vars(void *);
void *ort_get_thrpriv(void **key, int size, void *origvar);
void ort_sglvar_allocate(void **dataptr, int size, void *initer);
void ort_fence();
/* Atomic, critical, reduction, copyprivate and barrier */
void ort_atomic_begin();
void ort_atomic_end();
void ort_critical_begin(void **cl);
void ort_critical_end(void **cl);
void ort_reduction_begin(void **cl);
void ort_reduction_end(void **cl);
void ort_broadcast_private(int num, ...);
void ort_copy_private(int num, ...);
/*
* From ort_barrier.c
*/
void ort_barrier_me(void);
void parallel_barrier_wait(ort_defbar_t *bar, int eeid);
/*
* From ort_ws.c
*/
/* Structure to optimize guided/dynamic schedules.
* We compute it once and reuse it in every call to ort_get_xxx_chunk().
* Such things are declared by the compiler (as void *), are initialized
* during ort_entering_for() and are utilized in every call to
* ort_get_xxx_chunk() (static schedules ignore this, though).
*/
typedef struct _ort_gdopt_
{
volatile int *data; /* Denotes the current iter of the loop */
volatile void *lock; /* Lock to access *data */
int nth; /* # siblings */
void *me; /* my info node */
} ort_gdopt_t;
/* Workshare-related functions */
void init_workshare_regions(ort_eecb_t *me);
int ort_mysingle(int hasnowait);
void ort_leaving_single();
void ort_entering_sections(int hasnowait, int numberofsections);
void ort_leaving_sections();
int ort_get_section();
void ort_entering_for(int nowait, int hasordered, ort_gdopt_t *t);
int ort_leaving_for();
void ort_ordered_begin();
void ort_ordered_end();
void ort_thischunk_range(int lb, int ub);
/* For schedules support */
typedef int (*chunky_t)(int niters, int chunksize, int *fiter, int *liter,
int *extra, ort_gdopt_t *opt);
int ort_num_iters(int num, ...);
void ort_get_runtime_schedule_stuff(chunky_t *func, int *chunksize);
int ort_get_guided_chunk(int niters, int chunksize, int *fiter, int *liter,
int *ignored, ort_gdopt_t *t);
int ort_get_dynamic_chunk(int niters, int chunksize, int *fiter, int *liter,
int *ignored, ort_gdopt_t *t);
int ort_get_runtimestatic_chunk(int niters, int chunksize,
int *fiter, int *liter, int *chunkid, ort_gdopt_t *t);
int ort_get_static_default_chunk(int niters, int *from, int *to);
/*
* From ort_tasks.c
*/
void ort_init_tasking();
void ort_new_task(int final, int untied, void *(*func)(void *arg), void *arg);
void ort_taskwait(int waitall);
int ort_task_throttling(void);
void ort_create_task_immediate_node(ort_eecb_t *thr);
void *ort_task_immediate_start(int final);
void ort_task_immediate_end(void *tn);
void ort_execute_my_tasks(ort_eecb_t *me);
void ort_start_implicit_task(ort_eecb_t *thr);
void ort_finish_implicit_task(ort_eecb_t *thr);
/*
* From ort_pools.c
*/
ort_task_node_t* ort_task_alloc(void* (*func)(void *), void *arg);
void ort_task_free(ort_eecb_t *thr, ort_task_node_t* node);
void task_pools_init(ort_eecb_t *t);
void *ort_taskenv_alloc(int size, void *(*task_func)(void *));
ort_task_node_t* ort_task_empty_node_alloc(void);
void ort_taskenv_free(void *ptr, void *(*task_func)(void *));
/*
* From ort_workstealing.c
*/
void ort_task_queues_init(ort_eecb_t *me, int nthr);
int ort_task_worker_enqueue(ort_eecb_t *me, void* (*func)(void *),
void *arg, int final);
ort_task_node_t *ort_task_worker_dequeue(ort_eecb_t *me);
ort_task_node_t *ort_task_thief_steal(ort_eecb_t *me, int victim_id);
#endif // ORT_PRIVE_H
ort_private.h
最新推荐文章于 2024-04-29 11:24:22 发布