Bacula之watchdog实现与分析

在程序中设计,watchdog主要是为了监控程序,或者监控系统状态,或者间隔一定时间,触发某种操作。Bacula中设计的watchdog,在dird中是为了监控用户是否应该下线

(即很长时间,没有动作,就应被迫下线,避免占用服务器过多资源)

以下是watchdog.h

(在原有Bacula中进行了一些函数改动,思想思路未变,便于独立分析)

#ifndef _WATCHDOG_H
#define _WATCHDOG_H 

typedef struct s_watchdog_t { // 监控结构体
	bool one_shot; // 是否执行一次
	time_t interval;// 执行间隔时间
	time_t next_fire;// 下一次执行时间
	void (*callback)  (struct s_watchdog_t *wd);// 每次执行时,调用的函数
	void (*destructor)(struct s_watchdog_t *wd); // 监控退出时,调用的函数
	void *data; // 额外需要保存的数据
	Link link; // 链表,如果多个事件需要监控的话,事所有的监控事件形成一个链表
	
}watchdog_t;

bool watchdog_start(void); // 初始化监控器(全局函数)
void watchdog_ping(); // 每次监控器链表发生变化,都应发出signal 信号,触发事件检查及执行
bool watchdog_stop(void) ; // 监控器退出
watchdog_t *watchdog_init(bool oneshot,time_t internal_,void (*callback)(watchdog_t *),void (*destructor)(watchdog_t *),void *data) ; // 初始化一个监控体
bool watchdog_register(watchdog_t *wd) ;// 在监控器链表中注册一个监控器
bool watchdog_unregister(watchdog_t *wd); //撤销
void *watchdog_thread(void *arg); // 监控器具体执行的操作函数(作为一个新线程)

#endif 


 

 

watchdog.c 文件,主要的实现都在此文件中

 

 

#include "head.h"
     
/* Exported globals */
static time_t watchdog_time = 0; /* this has granularity of SLEEP_TIME */
static time_t watchdog_sleep_time = 60; /* examine things every 60 seconds */

/* Locals */
static pthread_mutex_t timer_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t  timer = PTHREAD_COND_INITIALIZER;

/* Static globals */
static bool quit = FALSE;
static bool wd_is_init = FALSE;
static pthread_mutex_t wd_lock = PTHREAD_MUTEX_INITIALIZER;/* watchdog lock */


static pthread_t wd_tid;
static List *wd_queue;
static List *wd_inactive;

/*
 *  watchdog init
 *
 *  Returns: TRUE on success
 *           FALSE on failure
 */
bool watchdog_start(void) {
	watchdog_t *dummy = NULL;
	if (wd_is_init) {
		return TRUE;
	}
	watchdog_time = time(NULL);
	wd_queue = list_init(dummy,&dummy->link);	
	wd_inactive =list_init(dummy,&dummy->link);;

	if (pthread_create(&wd_tid, NULL, watchdog_thread, NULL) != 0) {
		return FALSE;
	}
	wd_is_init = TRUE;
	return TRUE;
}

/*
 * Wake watchdog timer thread so that it walks the
 *  queue and adjusts its wait time (or exits).
 */
void watchdog_ping() {
	P(&timer_mutex);
	pthread_cond_signal(&timer);
	V(&timer_mutex);
}

/*
 * Terminate the watchdog thread
 *
 * Returns: TURE on success
 *          FALSE on failure
 */
bool watchdog_stop(void) 
{
	watchdog_t *p;

	if (!wd_is_init) {
		return TRUE;
	}

	quit = TRUE; /* notify watchdog thread to stop */
	wd_is_init = FALSE;

	watchdog_ping();
	
	pthread_join(wd_tid, NULL);

	while (!list_empty(wd_queue)) {
		void *item = list_first(wd_queue);
		list_remove(wd_queue,item);
		p = (watchdog_t *) item;
		if (p->destructor != NULL) {
			p->destructor(p); /* make operation at end */
		}
		free(p);
	}
	list_destroy(wd_queue);
	wd_queue = NULL;

	while (!list_empty(wd_inactive)) {
		void *item = list_first(wd_inactive);
		list_remove(wd_inactive,item);
		p = (watchdog_t *) item;
		if (p->destructor != NULL) {
			p->destructor(p);
		}
		free(p);
	}
	list_destroy(wd_inactive);
	wd_inactive = NULL;

	return TRUE;
}

watchdog_t *watchdog_init(bool oneshot,time_t internal_,void (*callback)(watchdog_t *wd),void (*destructor)(watchdog_t *wd),void *data) 
{
	
	watchdog_t *wd = NULL; 
	if (!wd_is_init) {
		watchdog_start();
	}

	P(&wd_lock); /* search a watchdog_t object from inactive list */
	wd = (watchdog_t *)list_first(wd_inactive);
	if (wd) {
		list_remove(wd_inactive,wd);
		V(&wd_lock);
	}
	else{
		V(&wd_lock);
		/* if no free ,new */
		wd = (watchdog_t *) malloc(sizeof(watchdog_t));
		if (wd == NULL) {
			return NULL;
		}

	}
	/* here should judge whether the type is valid 
	 *
	*/
	wd->one_shot = oneshot; /* default, operate only once */
	wd->interval = internal_;
	wd->next_fire=0;
	wd->callback = callback;
	wd->destructor = destructor;
	wd->data = data;
	return wd;
}

bool watchdog_register(watchdog_t *wd) {/* start to operation */
	if (!wd_is_init) {
		printf("BUG! register_watchdog called before start_watchdog\n");
		return FALSE;
	}
	if (wd->callback == NULL) {
		printf ("BUG! Watchdog  has NULL callback\n");
		return FALSE;
	}
	if (wd->interval == 0) {
		printf("BUG! Watchdog has zero interval\n");
	}
	P(&wd_lock);
	wd->next_fire = time(NULL) + wd->interval;
	list_append(wd_queue,wd);
    V(&wd_lock);
	watchdog_ping();
	return TRUE;
}

bool watchdog_unregister(watchdog_t *wd) {
	watchdog_t *p;
	bool ok = FALSE;

	if (!wd_is_init) {
		printf("BUG! register_watchdog called before start_watchdog\n");
		return FALSE;
	}
	P(&wd_lock);
	foreach_list(p, wd_queue) 
	{
		if (wd == p) {
			list_remove(wd_queue,wd);
			ok = TRUE;
			goto get_out;
		}
	}
	foreach_list(p, wd_inactive) {
		if (wd == p) {
			list_remove(wd_inactive,wd);
			ok = TRUE;
			goto get_out;
		}
	}
	get_out:
	V(&wd_lock);
	watchdog_ping();
	return ok;
}

/*
 * This is the thread that walks the watchdog queue
 *  and when a queue item fires, the callback is
 *  invoked.  If it is a one shot, the queue item
 *  is moved to the inactive queue.
 */
void *watchdog_thread(void *arg)
{
   /*pthread_detach(pthread_self());*/

   struct timespec timeout;
   struct timeval tv;
   struct timezone tz;
   time_t next_time;
	
  watchdog_t *p;

   while (!quit) 
   {
      /*
       *
       *   NOTE. lock_jcr_chain removed, but the message below
       *   was left until we are sure there are no deadlocks.
       *  
       *   We lock the jcr chain here because a good number of the
       *   callback routines lock the jcr chain. We need to lock
       *   it here *before* the watchdog lock because the SD message
       *   thread first locks the jcr chain, then when closing the
       *   job locks the watchdog chain. If the two threads do not
       *   lock in the same order, we get a deadlock -- each holds
       *   the other's needed lock.
       */
      P(&wd_lock);
walk_list:
      watchdog_time = time(NULL);
      next_time = watchdog_time + watchdog_sleep_time;
      foreach_list(p, wd_queue)
	  {
         if (p->next_fire <= watchdog_time) 
		 {
            /* Run the callback */
	         p->callback(p);    /*时间到,开始执行,有待改进*/
            /* Reschedule (or move to inactive list if it's a one-shot timer) */
            if (p->one_shot) {
			   list_remove(wd_queue,p);
			   list_append(wd_inactive,p);
               goto walk_list;
            } 
			else {
               p->next_fire = watchdog_time + p->interval;
            }
         }
		 
         if (p->next_fire <= next_time) {
            next_time = p->next_fire;
         }
      }
      V(&wd_lock);
      /*
       * Wait sleep time or until someone wakes us
       */
      gettimeofday(&tv, &tz);
      timeout.tv_nsec = tv.tv_usec * 1000;
      timeout.tv_sec = tv.tv_sec + next_time - time(NULL);
      
      /* Note, this unlocks mutex during the sleep */

      P(&timer_mutex); 
      pthread_cond_timedwait(&timer, &timer_mutex, &timeout);
      V(&timer_mutex);
   }
  return NULL;
}


 

测试程序

 

 

 

#define WATCHDOG_TEST
#ifdef WATCHDOG_TEST
void start (watchdog_t *wd)
{
 int i=*(int *)wd->data;
 printf("thread  %d start,internal=%ld,next=%ld\n",i,wd->interval,wd->next_fire);
}
void end (watchdog_t *wd)
{
 int i=*(int *)wd->data;
 printf("thread  %d end\n",i);

}
void *thread_excute(void * arg)
{
 watchdog_t * wd;
 printf("%d\n",*(int *)arg);
 wd=watchdog_init(FALSE,2,start,end,arg);
 watchdog_register(wd);
}
int main()
{
 pthread_t tid[5];
 int a[5]={1,2,3,4,5};
    int i;
 watchdog_start();
    for(i=0;i<5;i++)
  pthread_create(&tid[i],NULL,thread_excute,(void *)&a[i]);
 
    sleep(60);
 watchdog_stop();
}

#endif


 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值