1、vrrp 线程在创建 IPV4(224.0.0.18)或 IPV6(ff02::12) vrrp 组播接收 socket 后,首先会初始化 vrrp instances 超时时间,并根据超时时间将 vrrp->rb_sands 放入 rb tree 中。
/* Thread functions */
static void
vrrp_register_workers(list l)
{
...
...
...
/* Init VRRP instances sands */
vrrp_init_sands(vrrp_data->vrrp);
...
...
...
}
将 vrrp->rb_sands 插入 rb tree 中:
static void
vrrp_init_sands(list l)
{
vrrp_t *vrrp;
element e;
LIST_FOREACH(l, vrrp, e) {
/* 临时修改值为 LONG_MIN */
vrrp->sands.tv_sec = TIMER_DISABLED;
/*
将 vrrp 作为新节点插入以 vrrp->sockets->rb_sands 为根节点的 rb tree 中
rb_sands:rb tree 中节点的名字
vrrp_timer_cmp:节点之间的比较函数
注意:vrrp->rb_sands = RB_ROOT_CACHED = (struct rb_root_cached) { {NULL, }, NULL },由函数:alloc_sock() 初始化
*/
rb_insert_sort_cached(&vrrp->sockets->rb_sands, vrrp, rb_sands, vrrp_timer_cmp);
/* 设置节点超时时间,并根据超时时间重新调整 rb tree */
vrrp_init_instance_sands(vrrp);
vrrp->reload_master = false;
}
}
计算超时时间,并根据超时时间,重新调整 vrrp->rb_sands 在 rb tree 中的位置。对于 master,超时时间为 vrrp->adver_int,即通告发送间隔。
/* Compute the new instance sands */
void
vrrp_init_instance_sands(vrrp_t * vrrp)
{
set_time_now();
if (vrrp->state == VRRP_STATE_MAST) {
if (vrrp->reload_master)
/* 设置为当前时间 */
vrrp->sands = time_now;
else
/* 当前时间 + vrrp 发送通告报文的时间间隔 */
vrrp->sands = timer_add_long(time_now, vrrp->adver_int);
}
else if (vrrp->state == VRRP_STATE_BACK) {
/*
* When in the BACKUP state the expiry timer should be updated to
* time_now plus the Master Down Timer, when a non-preemptable packet is
* received.
*/
/*
当前时间 + master 超时时间
master 超时时间:vrrp->ms_down_timer = 3 * vrrp->master_adver_int + VRRP_TIMER_SKEW(vrrp),
若此计时器超时,则 back 就会宣布 master 死亡。
master_adver_int:master 发送 vrrp 通告报文的时间间隔
*/
vrrp->sands = timer_add_long(time_now, vrrp->ms_down_timer);
}
else if (vrrp->state == VRRP_STATE_FAULT || vrrp->state == VRRP_STATE_INIT)
/* vrrp 故障或者还在初始化 */
vrrp->sands.tv_sec = TIMER_DISABLED; /* TIMER_DISABLED = LONG_MIN */
/* 重新调整 vrrp->rb_sands 在 rb tree 中的位置 */
rb_move_cached(&vrrp->sockets->rb_sands, vrrp, rb_sands, vrrp_timer_cmp);
}
2、为各组播 socket 创建接收定时器,并将其插入以 master->read 为根节点的 rb tree中,其中 vrrp_compute_timer() 用于获取上一步计算好的超时时间。
/* Thread functions */
static void
vrrp_register_workers(list l)
{
...
...
...
/* Register VRRP workers threads */
LIST_FOREACH(l, sock, e) {
if (sock->fd_in != -1)
/* 调用 epoll_ctl 将套接字 sock->fd_in 注册到 master->epoll_fd */
sock->thread = thread_add_read_sands(master, vrrp_read_dispatcher_thread,
sock, sock->fd_in, vrrp_compute_timer(sock), false);
}
...
...
...
}
创建接收定时器,并将其插入以 master->read 为根节点的 rb tree中:
/* Add new read thread. */
thread_ref_t
thread_add_read_sands(thread_master_t *m, thread_func_t func, void *arg, int fd, const timeval_t *sands, bool close_on_reload)
{
thread_event_t *event;
thread_t *thread;
assert(m != NULL);
/* I feel lucky ! :D */
if (m->current_event && m->current_event->fd == fd)
event = m->current_event;
else
event = thread_event_get(m, fd);
if (!event) {
if (!(event = thread_event_new(m, fd))) {
log_message(LOG_INFO, "scheduler: Cant allocate read event for fd [%d](%m)", fd);
return NULL;
}
}
else if (__test_bit(THREAD_FL_READ_BIT, &event->flags) && event->read) {
log_message(LOG_INFO, "scheduler: There is already read event %p (read %p) registered on fd [%d]", event, event->read, fd);
return NULL;
}
thread = thread_new(m);
thread->type = THREAD_READ;
thread->master = m;
thread->func = func;
thread->arg = arg;
thread->u.f.fd = fd;
thread->u.f.close_on_reload = close_on_reload;
thread->event = event;
/* Set & flag event */
__set_bit(THREAD_FL_READ_BIT, &event->flags);
event->read = thread;
if (!__test_bit(THREAD_FL_EPOLL_READ_BIT, &event->flags)) {
/* 调用 epoll_ctl 将套接字 fd 注册到 master->epoll_fd */
if (thread_event_set(thread) < 0) {
log_message(LOG_INFO, "scheduler: Cant register read event for fd [%d](%m)", fd);
thread_add_unuse(m, thread);
return NULL;
}
__set_bit(THREAD_FL_EPOLL_READ_BIT, &event->flags);
}
thread->sands = *sands;
/* 将 thread 作为新节点,插入以 m->read 为根节点的 rb tree 中 */
rb_insert_sort_cached(&m->read, thread, n, thread_timer_cmp);
return thread;
}
3、定时器超时检测。
定时器 master->timer_thread 超期后,在回调函数 thread_timerfd_handler() 中,会遍历 rb tree:master->read,将所有超时节点存入双向链表 ready,并从 rb tree 中删除已超时节点。(具体参看前面的文章:keepalived源码解析 —— main()、keepalived源码解析 —— socket 超时)。
thread_rb_move_ready(m, &m->read, THREAD_READ_TIMEOUT);
遍历 rb tree,将所有超时节点移动到双向链表 ready,并从 rb tree 中删除:
/* Move ready thread into ready queue */
static void
thread_rb_move_ready(thread_master_t *m, rb_root_cached_t *root, int type)
{
thread_t *thread, *thread_tmp;
/* 遍历 rb tree 所有节点,判断节点是否超时 */
rb_for_each_entry_safe_cached(thread, thread_tmp, root, n) {
if (thread->sands.tv_sec == TIMER_DISABLED || timercmp(&time_now, &thread->sands, <))
break;
if (type == THREAD_READ_TIMEOUT)
thread->event->read = NULL;
else if (type == THREAD_WRITE_TIMEOUT)
thread->event->write = NULL;
/* 将节点移动到双向链表 ready */
thread_move_ready(m, root, thread, type);
}
}
将节点移动到双向链表 ready,并从 rb tree 中删除:
/* Move ready thread into ready queue */
static int
thread_move_ready(thread_master_t *m, rb_root_cached_t *root, thread_t *thread, int type)
{
/* 从 rb tree 中删除节点 */
rb_erase_cached(&thread->n, root);
/* 将节点移动到双向链表 ready */
INIT_LIST_HEAD(&thread->next);
list_add_tail(&thread->next, &m->ready);
if (thread->type != THREAD_TIMER_SHUTDOWN)
thread->type = type;
return 0;
}
4、socket 接收超时处理。
在 process_threads() 中,会对双向链表 ready 进行处理,并调用超时回调函数vrrp_read_dispatcher_thread():
if (thread->func)
thread_call(thread); /* 执行回调函数 */
对于接收超时,会进一步调用 vrrp_dispatcher_read_timeout() 进行处理,处理完之后,会再次创建接收定时器。
/* Our read packet dispatcher */
static int
vrrp_read_dispatcher_thread(thread_ref_t thread)
{
sock_t *sock;
int fd;
/* Fetch thread arg */
sock = THREAD_ARG(thread);
/* Dispatcher state handler */
if (thread->type == THREAD_READ_TIMEOUT || sock->fd_in == -1)
fd = vrrp_dispatcher_read_timeout(sock);
else
fd = vrrp_dispatcher_read(sock);/* 接收 vrrp 通告 */
/* register next dispatcher thread */
if (fd != -1)
/* 调用 epoll_ctl 将套接字 sock->fd_in 注册到 master->epoll_fd */
sock->thread = thread_add_read_sands(thread->master, vrrp_read_dispatcher_thread,
sock, fd, vrrp_compute_timer(sock), false);
return 0;
}
遍历以 sock->rb_sands 为根节点的 rb tree,判断节点是否超时,对于 master,超时则调用 vrrp_master() 发送 vrrp 通告。
/* Handle dispatcher read timeout */
static int
vrrp_dispatcher_read_timeout(sock_t *sock)
{
vrrp_t *vrrp;
int prev_state;
set_time_now();
/* 遍历以 sock->rb_sands 为根节点的 rb tree */
rb_for_each_entry_cached(vrrp, &sock->rb_sands, rb_sands) {
/* 判断是否超时 */
if (vrrp->sands.tv_sec == TIMER_DISABLED ||
timercmp(&vrrp->sands, &time_now, >))
break;
prev_state = vrrp->state;
if (vrrp->state == VRRP_STATE_BACK) {
if (__test_bit(LOG_DETAIL_BIT, &debug))
log_message(LOG_INFO, "(%s) Receive advertisement timeout", vrrp->iname);
/* back 接收 master vrrp 通告超时 */
vrrp_goto_master(vrrp);
}
else if (vrrp->state == VRRP_STATE_MAST)
vrrp_master(vrrp);
/* handle instance synchronization */
#ifdef _TSM_DEBUG_
if (do_tsm_debug)
log_message(LOG_INFO, "Send [%s] TSM transition : [%d,%d] Wantstate = [%d]",
vrrp->iname, prev_state, vrrp->state, vrrp->wantstate);
#endif
VRRP_TSM_HANDLE(prev_state, vrrp);
/* 重新计算超时时间 */
vrrp_init_instance_sands(vrrp);
}
return sock->fd_in;
}
发送 vrrp 通告:
static void
vrrp_master(vrrp_t * vrrp)
{
/* Send the VRRP advert */
vrrp_state_master_tx(vrrp);
}