【VPP】vpp节点调度

文章详细介绍了vpp中的process类型节点调度过程,包括节点类型定义、节点注册、调度流程,以及使用clib_longjmp和setjmp实现的轻量级多任务协程机制。在vpp中,process节点通过协程等待事件或时钟触发,进行挂起和恢复,实现了事件驱动的高效处理。
摘要由CSDN通过智能技术生成

vpp process类型节点调度过程

vpp节点类型

VLIB_NODE_TYPE_PROCESS:process类型节点可以被挂起也可以被恢复,main线程上调度

typedef enum
{
  /* An internal node on the call graph (could be output). */
  VLIB_NODE_TYPE_INTERNAL,

  /* Nodes which input data into the processing graph.
     Input nodes are called for each iteration of main loop. */
  VLIB_NODE_TYPE_INPUT,

  /* Nodes to be called before all input nodes.
     Used, for example, to clean out driver TX rings before
     processing input. */
  VLIB_NODE_TYPE_PRE_INPUT,

  /* "Process" nodes which can be suspended and later resumed. */
  VLIB_NODE_TYPE_PROCESS,

  VLIB_N_NODE_TYPE,
} vlib_node_type_t;

process节点注册

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_full_reass_expire_node) = {
    .function = ip4_full_reass_walk_expired,
    .type = VLIB_NODE_TYPE_PROCESS,
    .name = "ip4-full-reassembly-expire-walk",
    .format_trace = format_ip4_full_reass_trace,
    .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
    .error_strings = ip4_full_reass_error_strings,

};

process类型节点调度流程

每个process节点是由jump机制构成的一个协程,协程主要用于等待、处理事件。

使用longjmp/setjmp的轻量级多任务协程,由应用进程自行进行调度,不受操作系统调度机制的影响,上下文切换只损耗调用longjmp/setjmp的时间。

协程中运行的函数类似于线程函数,区别在于协程函数can be suspended, wait for events, be resumed…(based on setjump/longjump)

以x86_64 cpu来说,clib_longjmp_t存放rbx, rbp, r12, r13, r14, r15, eip, rsp寄存器,利用这些寄存器实现跳转功能

#if defined(__x86_64__)
/* rbx, rbp, r12, r13, r14, r15, eip, rsp */
#define CLIB_ARCH_LONGJMP_REGS 8

#elif defined(i386)
/* ebx, ebp, esi, edi, eip, rsp */
#define CLIB_ARCH_LONGJMP_REGS 6

#elif (defined(__powerpc64__) || defined(__powerpc__))
typedef struct
{
  uword regs[CLIB_ARCH_LONGJMP_REGS];
} clib_longjmp_t __attribute__ ((aligned (16)));

/* Return given value to saved context. */
void clib_longjmp (clib_longjmp_t * save, uword return_value);

/* Save context.  Returns given value if jump is not taken;
   otherwise returns value from clib_longjmp if long jump is taken. */
uword clib_setjmp (clib_longjmp_t * save, uword return_value_not_taken);

/* Call function on given stack. */
uword clib_calljmp (uword (*func) (uword func_arg),
		    uword func_arg, void *stack);

clib_setjmp

作用:保存上下文,即上述的寄存器。
返回值:首次调用(跳转功能还未执行过),返回传入的第二个参数,即return_value_not_taken,否则返回clib_longjmp的返回值(如果long jump已执行)

clib_longjmp

作用:恢复clib_setjmp时调用栈环境的存储数据,跳转回去继续执行
返回值:传入的第二个参数

clib_calljmp

作用:在给定的栈空间上执行function

process节点初始化 (以ip4_full_reass_expire_node节点为例)

vlib_main_or_worker_loop -> dispatch_process -> vlib_process_startup

通过执行dispatch_process完成最初对各process节点的一次调度,即初始化操作

static_always_inline void
vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
{
	......
	/* Start all processes. */
  if (is_main)
    {
      uword i;
      /*
       * Perform an initial barrier sync. Pays no attention to
       * the barrier sync hold-down timer scheme, which won't work
       * at this point in time.
       */
      vlib_worker_thread_initial_barrier_sync_and_release (vm);

      nm->current_process_index = ~0;
      for (i = 0; i < vec_len (nm->processes); i++)
	cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0,
					 cpu_time_now);
    }
	......
}
  1. vlib_process_startup中,clib_setjmp设置return_longjmp,此时返回值r为VLIB_PROCESS_RETURN_LONGJMP_RETURN
  2. 然后clib_calljmp在给定的栈空间上(给每个process节点在注册时分配的p->stack)调用vlib_process_bootstrap
/* Called in main stack. */
static_always_inline uword
vlib_process_startup (vlib_main_t * vm, vlib_process_t * p, vlib_frame_t * f)
{
  vlib_process_bootstrap_args_t a;
  uword r;
  
  a.vm = vm;
  a.process = p;
  a.frame = f;
  
  /*****************************Step 1************************************/
  r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN); 
  if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
  {
      vlib_process_start_switch_stack (vm, p);
       /****************************Step 2**************************/
      r = clib_calljmp (vlib_process_bootstrap, pointer_to_uword (&a),
			(void *) p->stack + (1 << p->log2_n_stack_bytes));  // Step 2
  }
  else
    vlib_process_finish_switch_stack (vm);
    
  return r;
}
  1. 真正执行各process节点的function
/* Called in process stack. */
static uword
vlib_process_bootstrap (uword _a)
{
  vlib_process_bootstrap_args_t *a;
  vlib_main_t *vm;
  vlib_node_runtime_t *node;
  vlib_frame_t *f;
  vlib_process_t *p;
  uword n;

  a = uword_to_pointer (_a, vlib_process_bootstrap_args_t *);

  vm = a->vm;
  p = a->process;
  vlib_process_finish_switch_stack (vm);

  f = a->frame;
  node = &p->node_runtime;

  n = node->function (vm, node, f);         //************Step 3***************

  ASSERT (vlib_process_stack_is_valid (p));

  vlib_process_start_switch_stack (vm, 0);
  clib_longjmp (&p->return_longjmp, n);

  return n;
}
  1. 每个process节点都会在开头处调用vlib_process_wait_for_event_or_clockvlib_process_wait_for_event
static uword
ip4_full_reass_walk_expired (vlib_main_t * vm,
			     vlib_node_runtime_t * node, vlib_frame_t * f)
{
  ip4_full_reass_main_t *rm = &ip4_full_reass_main;
  uword event_type, *event_data = 0;

  while (true)
    {
      /****************Step4*****************/
      vlib_process_wait_for_event_or_clock (vm,
					    (f64)
					    rm->expire_walk_interval_ms /
					    (f64) MSEC_PER_SEC);
      event_type = vlib_process_get_events (vm, &event_data);

      switch (event_type)
	{
	case ~0:		/* no events => timeout */
	  /* nothing to do here */
	  break;
	case IP4_EVENT_CONFIG_CHANGED:
	  break;
	default:
	  clib_warning ("BUG: event type 0x%wx", event_type);
	  break;
	}
  ......
}
  1. vlib_process_wait_for_event_or_clock先去检查non_empty_event_type_bitmap是否有置位,如有说明有事件需要去处理,则直接返回。否则将suspend状态标记置位,标识当前是suspend状态,等待event或clock。
  2. 设置resume_longjmp,此时返回值r为VLIB_PROCESS_RESUME_LONGJMP_SUSPEND
  3. 调用clib_longjmp跳转到vlib_process_startup -> clib_setjmp 设置return_longjmp处,此时返回值r应为clib_longjmp的第二个参数,即VLIB_PROCESS_RETURN_LONGJMP_SUSPEND
  4. 上述步骤完成了一个process节点的初始调度,各process node暂时处于suspend状态,等待某个条件(时钟或事件)的到来进入resume状态并执行业务逻辑
/** Suspend a cooperative multi-tasking thread
    Waits for an event, or for the indicated number of seconds to elapse
    @param vm - vlib_main_t pointer
    @param dt - timeout, in seconds.
    @returns the remaining time interval
*/

always_inline f64
vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt)
{
  vlib_node_main_t *nm = &vm->node_main;
  vlib_process_t *p;
  f64 wakeup_time;
  uword r;

  p = vec_elt (nm->processes, nm->current_process_index);
	
	/***********************Step5********************************/
  if (vlib_process_suspend_time_is_zero (dt)
      || !clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
    return dt;

  wakeup_time = vlib_time_now (vm) + dt;

  /* Suspend waiting for both clock and event to occur. */
  p->flags |= (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
	       | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK);

	/***********************Step6********************************/
  r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
  if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
    {
      p->resume_clock_interval = dt * 1e5;
      vlib_process_start_switch_stack (vm, 0);
      /***********************Step7********************************/
      clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
    }
  else
    vlib_process_finish_switch_stack (vm);

  /* Return amount of time still left to sleep.
     If <= 0 then we've been waken up by the clock (and not an event). */
  return wakeup_time - vlib_time_now (vm);
}
static u64
dispatch_process (vlib_main_t * vm,
		  vlib_process_t * p, vlib_frame_t * f, u64 last_time_stamp)
{
  ......
  n_vectors = vlib_process_startup (vm, p, f);//这里返回值为VLIB_PROCESS_RETURN_LONGJMP_SUSPEND

  nm->current_process_index = old_process_index;

  ASSERT (n_vectors != VLIB_PROCESS_RETURN_LONGJMP_RETURN);
  is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND;
  if (is_suspend)//节点进入suspend状态
    {
      vlib_pending_frame_t *pf;

      n_vectors = 0;
      pool_get (nm->suspended_process_frames, pf);
      pf->node_runtime_index = node->runtime_index;
      pf->frame = f;
      pf->next_frame_index = ~0;

      p->n_suspends += 1;
      p->suspended_process_frame_index = pf - nm->suspended_process_frames;

      if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)//基于时间的调度策略,依赖时间轮构建
	   {
		  TWT (tw_timer_wheel) * tw =
		    (TWT (tw_timer_wheel) *) nm->timing_wheel;
		  p->stop_timer_handle =
		    TW (tw_timer_start) (tw,
					 vlib_timing_wheel_data_set_suspended_process
					 (node->runtime_index) /* [sic] pool idex */ ,
					 0 /* timer_id */ ,
					 p->resume_clock_interval);//vlib_process_wait_for_event_or_clock 设置这个值
		}
	}
	......
}
resume

需要恢复的process有两种情况,一种是等待的时钟已经到期,一种是等待的事件发生

data_from_advancing_timing_wheel数组存放所有超时的process index。不仅等待时钟的,等待事件的process,也会向数组加入自己的index

	  /* Check if process nodes have expired from timing wheel. */
	  ASSERT (nm->data_from_advancing_timing_wheel != 0);

	  if (PREDICT_FALSE (vm->elog_trace_graph_dispatch))
	    ed = ELOG_DATA (&vlib_global_main.elog_main, es);

	  nm->data_from_advancing_timing_wheel =
	    TW (tw_timer_expire_timers_vec)
	    ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm),
	     nm->data_from_advancing_timing_wheel);

	  ASSERT (nm->data_from_advancing_timing_wheel != 0);

	  if (PREDICT_FALSE (vm->elog_trace_graph_dispatch))
	    {
	      ed = ELOG_DATA (&vlib_global_main.elog_main, ee);
	      ed->nready_procs =
		_vec_len (nm->data_from_advancing_timing_wheel);
	    }

dispatch_suspended_process -> vlib_process_resume

  1. 设置return_longjmp,此时返回值r为VLIB_PROCESS_RETURN_LONGJMP_RETURN
  2. 调用clib_longjmp,跳转到第一次调度时设置resume_longjmp处,即vlib_process_wait_for_event_or_clock->clib_setjmp处,此时clib_setjmp返回值应为VLIB_PROCESS_RESUME_LONGJMP_RESUME
static_always_inline uword
vlib_process_resume (vlib_main_t * vm, vlib_process_t * p)
{
  uword r;
  p->flags &= ~(VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
		| VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
		| VLIB_PROCESS_RESUME_PENDING);
	/***********************Step1********************************/
  r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN);
  if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
    {
      vlib_process_start_switch_stack (vm, p);
      /***********************Step2********************************/
      clib_longjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_RESUME);
    }
  else
    vlib_process_finish_switch_stack (vm);
  return r;
}
/** Suspend a cooperative multi-tasking thread
    Waits for an event, or for the indicated number of seconds to elapse
    @param vm - vlib_main_t pointer
    @param dt - timeout, in seconds.
    @returns the remaining time interval
*/

always_inline f64
vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt)
{
  vlib_node_main_t *nm = &vm->node_main;
  vlib_process_t *p;
  f64 wakeup_time;
  uword r;

  p = vec_elt (nm->processes, nm->current_process_index);

  if (vlib_process_suspend_time_is_zero (dt)
      || !clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
    return dt;

  wakeup_time = vlib_time_now (vm) + dt;

  /* Suspend waiting for both clock and event to occur. */
  p->flags |= (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
	       | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK);

  r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
  if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
    {
      p->resume_clock_interval = dt * 1e5;
      vlib_process_start_switch_stack (vm, 0);
      clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
    }
  else //此次走这个分支
    vlib_process_finish_switch_stack (vm);

  /* Return amount of time still left to sleep.
     If <= 0 then we've been waken up by the clock (and not an event). */
  return wakeup_time - vlib_time_now (vm);
}
  1. 返回到ip4_full_reass_walk_expired节点,执行业务逻辑,即重组buffer的老化流程,各节点的业务逻辑执行均为死循环,下一次调用到vlib_process_wait_for_event_or_clock时,设置resume_longjmp,此时返回值r为VLIB_PROCESS_RESUME_LONGJMP_SUSPEND
  2. 调用clib_longjmp跳转到vlib_process_resume -> clib_setjmp 设置return_longjmp处,此时返回值r应为clib_longjmp的第二个参数,即VLIB_PROCESS_RETURN_LONGJMP_SUSPEND
  3. 进入到下一轮的suspend状态,在vpp main线程的循环中,一直持续着suspend resume的状态切换过程,每次resuem时,执行节点的业务逻辑
  r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
  if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
    {
      p->resume_clock_interval = dt * 1e5;
      vlib_process_start_switch_stack (vm, 0);
      clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
    }
  else
    vlib_process_finish_switch_stack (vm);

参考文档链接

  • https://developer.aliyun.com/article/610474?spm=a2c6h.13262185.profile.29.67b32e31kB4Qt4
  • https://blog.csdn.net/sjin_1314/article/details/106170429
  • https://zhuanlan.zhihu.com/p/484606752
  • https://blog.csdn.net/weixin_39915694/article/details/111253624
  • https://segmentfault.com/a/1190000019613786
    .net/weixin_39915694/article/details/111253624
  • https://segmentfault.com/a/1190000019613786
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值