Reading notes about low-resolution timer implementation on linux.

 Reading notes about low-resolution timer implementation on linux.
Author: Honggang Yang(Joseph) <ganggexiongqi@gmail.com>
Kernel Version: Linux 3.1.1
===================================================================

REF: Professional Linux Kernel Architecture
         + Essential Linux Device Drivers
         + Understanding the Linux Kernel 3

===============================

Contents:

1. INIT the base structures

2. Dynamic timers register, modification and deletion

    2.1 Register and modification

    2.2 Dynamic timers deletion

3. Dynamic timer handling

4.  A whole view of the low-resolution timer system

5. Demon of how to use dynamic timer in your modules

--------------------------------------------

Checking for timer functions is always done by deferrable functions that

may be executed a long time after they have been activated, the kernel
cannot ensure that timer functions will start right at their expiration times.
It can only ensure that they are executed either at the proper time or
after with a delay up to a few hundreds of milliseconds. For this reason,
timer are not appropriate for real-time applications in which expiration times
must be strictly enforced.[ULK3, Pg 244]

  --------- Dynamic timer management structures -------
 
  struct tvec {
     struct list_head vec[TVN_SIZE];
  };

  struct tvec_root {
     struct list_head vec[TVR_SIZE];
  };

  struct tvec_base {
    spinlock_t lock;
    struct timer_list *running_timer; // pointer to the timer being proccessed now
    /* It records the time(in jiffies) by which all timers of the structure
    * were executed.
    */
    unsigned long timer_jiffies;
    unsigned long next_timer; //????
    struct tvec_root tv1;
    struct tvec tv2;
    struct tvec tv3;
    struct tvec tv4;
    struct tvec tv5;
} ____cacheline_aligned;

struct timer_list {
    /*
     * All fields that change during normal runtime grouped to the
     * same cacheline
     */
    struct list_head entry;
    unsigned long expires;
    struct tvec_base *base;
    
    void (*function)(unsigned long);
    unsigned long data;

    int slack;
    
#ifdef CONFIG_TIMER_STATS
    int start_pid;
    void *start_site;
    char start_comm[16];
#endif
#ifdef CONFIG_LOCKDEP
    struct lockdep_map lockdep_map;
#endif
};

---------------
1. INIT the base structures
 
 Call  Tree:
 -----------------------------
 start_kernel
        init_timers
                timer_cpu_notify
                        init_timers_cpu
        hrtimers_init
        timekeeping_init
        time_init*
        late_time_init*

------------------

struct tvec_base boot_tvec_bases; /* Only for the boot CPU */
EXPORT_SYMBOL(boot_tvec_bases);
static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;

  /*
  * Init the per cpu var @tvec_bases for each CPU.
  * ------------------
  * Each CPU has a struct tvec_base structure. As we don't know the number
  * of CPU in our system, we only defined @boot_tvec_bases for the first CPU.
  * Other CPUs' tvec_base structures will be allocated by kmmalloc_node().
  * @tvec_bases->timer_jiffies and @tvec_bases->next_timer are initialized
  * to @jiffies.
  * ----------------
  * kernel/timer.c
  */
1595 static int __cpuinit init_timers_cpu(int cpu)
1596 {
1597     int j;
1598     struct tvec_base *base;
1599     static char __cpuinitdata tvec_base_done[NR_CPUS];
1600
1601     if (!tvec_base_done[cpu]) {
1602         static char boot_done;
1603
1604         if (boot_done) {
1605             /*
1606              * The APs use this path later in boot
1607              */
1608             base = kmalloc_node(sizeof(*base),
1609                         GFP_KERNEL | __GFP_ZERO,
1610                         cpu_to_node(cpu));
1611             if (!base)
1612                 return -ENOMEM;
1613
1614             /* Make sure that tvec_base is 2 byte aligned */
1615             if (tbase_get_deferrable(base)) {
1616                 WARN_ON(1);
1617                 kfree(base);
1618                 return -ENOMEM;
1619             }
1620             per_cpu(tvec_bases, cpu) = base;
1621         } else {
1622             /*
1623              * This is for the boot CPU - we use compile-time
1624              * static initialisation because per-cpu memory isn't
1625              * ready yet and because the memory allocators are not
1626              * initialised either.
1627              */
1628             boot_done = 1;
1629             base = &boot_tvec_bases;
1630         }
1631         tvec_base_done[cpu] = 1;
1632     } else {
1633         base = per_cpu(tvec_bases, cpu);
1634     }
1635
1636     spin_lock_init(&base->lock);
1637
1638     for (j = 0; j < TVN_SIZE; j++) {
1639         INIT_LIST_HEAD(base->tv5.vec + j);
1640         INIT_LIST_HEAD(base->tv4.vec + j);
1641         INIT_LIST_HEAD(base->tv3.vec + j);
1642         INIT_LIST_HEAD(base->tv2.vec + j);
1643     }
1644     for (j = 0; j < TVR_SIZE; j++)
1645         INIT_LIST_HEAD(base->tv1.vec + j);
1646
1647     base->timer_jiffies = jiffies;
1648     base->next_timer = base->timer_jiffies;
1649     return 0;
1650 }

2. Dynamic timers register, modification and deletion

  2.1 Register and modification

          First, initialize a timer_list object. Then register it to the system by calling
          add_timer(). When the timer expire, the timer's callback function will
          be called in the soft interrupt envirenment.
            
          Related functions:
                  init_timer()
                  add_timer()
                  
---------------
call tree:
#define init_timer(timer)\
    init_timer_key((timer), NULL, NULL)    
                
init_timer  (init_timer_key)
        __init_timer

add_timer
     mod_timer
             __mod_timer
                 internal_add_timer
 ---------------          

static void __init_timer(struct timer_list *timer,
             const char *name,
             struct lock_class_key *key)
{
    timer->entry.next = NULL;
    timer->base = __raw_get_cpu_var(tvec_bases);
    timer->slack = -1;
#ifdef CONFIG_TIMER_STATS
    timer->start_site = NULL;
    timer->start_pid = -1;
    memset(timer->start_comm, 0, TASK_COMM_LEN);
#endif
    lockdep_init_map(&timer->lockdep_map, name, key, 0);
}

/**
 * add_timer - start a timer
 * @timer: the timer to be added
 *
 * The kernel will do a ->function(->data) callback from the
 * timer interrupt at the ->expires point in the future. The
 * current time is 'jiffies'.
 *
 * The timer's ->expires, ->function (and if the handler uses it, ->data)
 * fields must be set prior calling this function.
 *
 * Timers with an ->expires field in the past will be executed in the next
 * timer tick.
 */
void add_timer(struct timer_list *timer)
{
    BUG_ON(timer_pending(timer));
    mod_timer(timer, timer->expires);
}
EXPORT_SYMBOL(add_timer);

/**
 * mod_timer - modify a timer's timeout
 * @timer: the timer to be modified
 * @expires: new timeout in jiffies
 *
 * mod_timer() is a more efficient way to update the expire field of an
 * active timer (if the timer is inactive it will be activated)
 *
 * mod_timer(timer, expires) is equivalent to:
 *
 *     del_timer(timer); timer->expires = expires; add_timer(timer);
 *
 * Note that if there are multiple unserialized concurrent users of the
 * same timer, then mod_timer() is the only safe way to modify the timeout,
 * since add_timer() cannot modify an already running timer.
 *
 * The function returns whether it has modified a pending timer or not.
 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
 * active timer returns 1.)
 */
int mod_timer(struct timer_list *timer, unsigned long expires)
{
    expires = apply_slack(timer, expires);

    /*
     * This is a common optimization triggered by the
     * networking code - if the timer is re-modified
     * to be the same thing then just return:
     */
    if (timer_pending(timer) && timer->expires == expires)
        return 1;

    return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
}
EXPORT_SYMBOL(mod_timer);

static inline int
__mod_timer(struct timer_list *timer, unsigned long expires,
                        bool pending_only, int pinned)
{
    struct tvec_base *base, *new_base;
    unsigned long flags;
    int ret = 0 , cpu;

    timer_stats_timer_set_start_info(timer);
    BUG_ON(!timer->function);

    base = lock_timer_base(timer, &flags);

    if (timer_pending(timer)) {
        detach_timer(timer, 0);
        if (timer->expires == base->next_timer &&
            !tbase_get_deferrable(timer->base))
            base->next_timer = base->timer_jiffies;
        ret = 1;
    } else {
        if (pending_only)
            goto out_unlock;
    }

    debug_activate(timer, expires);

    cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
    if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
        cpu = get_nohz_timer_target();
#endif
    new_base = per_cpu(tvec_bases, cpu);

    if (base != new_base) {
        /*
         * We are trying to schedule the timer on the local CPU.
         * However we can't change timer's base while it is running,
         * otherwise del_timer_sync() can't detect that the timer's
         * handler yet has not finished. This also guarantees that
         * the timer is serialized wrt itself.
         */
        if (likely(base->running_timer != timer)) {
            /* See the comment in lock_timer_base() */
            timer_set_base(timer, NULL);
            spin_unlock(&base->lock);
            base = new_base;
            spin_lock(&base->lock);
            timer_set_base(timer, base);
        }
    }

    timer->expires = expires;
    if (time_before(timer->expires, base->next_timer) &&
        !tbase_get_deferrable(timer->base))
        base->next_timer = timer->expires;
    internal_add_timer(base, timer);

out_unlock:
    spin_unlock_irqrestore(&base->lock, flags);

    return ret;
}

static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
    unsigned long expires = timer->expires;
    unsigned long idx = expires - base->timer_jiffies;
    struct list_head *vec;

    if (idx < TVR_SIZE) {
        int i = expires & TVR_MASK;
        vec = base->tv1.vec + i;
    } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
        int i = (expires >> TVR_BITS) & TVN_MASK;
        vec = base->tv2.vec + i;
    } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
        int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
        vec = base->tv3.vec + i;
    } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
        int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
        vec = base->tv4.vec + i;
    } else if ((signed long) idx < 0) {
        /*
         * Can happen if you add a timer with expires == jiffies,
         * or you set a timer to go off in the past
         */
        vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
    } else {
        int i;
        /* If the timeout is larger than 0xffffffff on 64-bit
         * architectures then we use the maximum timeout:
         */
        if (idx > 0xffffffffUL) {
            idx = 0xffffffffUL;
            expires = idx + base->timer_jiffies;
        }
        i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
        vec = base->tv5.vec + i;
    }
    /*
     * Timers are FIFO:
     */
    list_add_tail(&timer->entry, vec);
}

2.2 Dynamic timers deletion
  Being asynchronously activated, dynamic timers are prone to race
  conditions. More info. refer. ULK pg 246.

call tree:
   del_timer
        timer_pending
        detach_timer

--------------------------

/**
 * del_timer - deactive a timer.
 * @timer: the timer to be deactivated
 *  
 * del_timer() deactivates a timer - this works on both active and inactive
 * timers.
 *  
 * The function returns whether it has deactivated a pending timer or not.
 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
 * active timer returns 1.)
 */
int del_timer(struct timer_list *timer)
{       
    struct tvec_base *base;
    unsigned long flags;
    int ret = 0;
    
    timer_stats_timer_clear_start_info(timer);
    if (timer_pending(timer)) {
        base = lock_timer_base(timer, &flags);
        if (timer_pending(timer)) {
            detach_timer(timer, 1);
            if (timer->expires == base->next_timer &&
                !tbase_get_deferrable(timer->base))
                base->next_timer = base->timer_jiffies;
            ret = 1;
        }
        spin_unlock_irqrestore(&base->lock, flags);
    }
    
    return ret;
}
EXPORT_SYMBOL(del_timer);

/**
 * timer_pending - is a timer pending?
 * @timer: the timer in question
 *
 * timer_pending will tell whether a given timer is currently pending,
 * or not. Callers must ensure serialization wrt. other operations done
 * to this timer, eg. interrupt contexts, or other CPUs on SMP.
 *  
 * return value: 1 if the timer is pending, 0 if not.
 */
static inline int timer_pending(const struct timer_list * timer)
{       
    return timer->entry.next != NULL;
}  

//arch/x86/include/asm/x86_init.h
/**
 * struct x86_init_timers - platform specific timer setup
 * @setup_perpcu_clockev:   set up the per cpu clock event device for the
 *              boot cpu
 * @tsc_pre_init:       platform function called before TSC init
 * @timer_init:         initialize the platform timer (default PIT/HPET)
 * @wallclock_init:     init the wallclock device
 */
struct x86_init_timers {
    void (*setup_percpu_clockev)(void);
    void (*tsc_pre_init)(void);
    void (*timer_init)(void);
    void (*wallclock_init)(void);
};

static inline void detach_timer(struct timer_list *timer,
                int clear_pending)
{
    struct list_head *entry = &timer->entry;

    debug_deactivate(timer);

    __list_del(entry->prev, entry->next);
    if (clear_pending)
        entry->next = NULL;
    entry->prev = LIST_POISON2;
}

3. Dynamic timer handling

Despite the clever data structures, handling software timers is a time-cons-
suming activity that should not be performed by the timer interrupt hanler.
In this version of Linux this activity is carried on by a deferrable function,

namely the TIMER_SOFTIRQ softirq.[UKL pg 248]


 Call Tree:
         tick_handle_periodic    

                tick_periodic


call tree:
tick_periodic | tick_nohz_handler| tick_sched_timer ...
        update_process_times
             run_local_timers
                        raise_softirq(TIMER_SOFTIRQ)// trigger the softirq timer handler

run_timer_softirq
        __run_timers
------------------------



/*
 * Called from the timer interrupt handler to charge one tick to the current
 * process.  user_tick is 1 if the tick is user time, 0 for system.
 */
void update_process_times(int user_tick)
{
    struct task_struct *p = current;
    int cpu = smp_processor_id();

    /* Note: this timer irq context must be accounted for as well. */
    account_process_tick(p, user_tick);
    run_local_timers();//****
    rcu_check_callbacks(cpu, user_tick);
    printk_tick();
#ifdef CONFIG_IRQ_WORK
    if (in_irq())
        irq_work_run();
#endif
    scheduler_tick();
    run_posix_cpu_timers(p);
}

/*
 * Called by the local, per-CPU timer interrupt on SMP.
 */
void run_local_timers(void)
{
    hrtimer_run_queues();
    raise_softirq(TIMER_SOFTIRQ); //***
}

/*
 * This function runs timers and the timer-tq in bottom half context.
 */
static void run_timer_softirq(struct softirq_action *h)
{
    struct tvec_base *base = __this_cpu_read(tvec_bases);

    hrtimer_run_pending();
    /* base->timer_jiffies is the earlist expired timer's expire value */
    if (time_after_eq(jiffies, base->timer_jiffies))
        __run_timers(base);
}

#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
/**
 * __run_timers - run all expired timers (if any) on this CPU.
 * @base: the timer vector to be processed.
 *
 * This function cascades all vectors and executes all expired timer
 * vectors.
 */
static inline void __run_timers(struct tvec_base *base)
{
    struct timer_list *timer;

    spin_lock_irq(&base->lock);
    /*
    * If the kernel has missed a number of timers in the past, they are dealt
    * with now by processing all pointers that expired between the last execution
    * point(base->timer_jiffes) and the current time(jiffies).
    */
    while (time_after_eq(jiffies, base->timer_jiffies)) {
        struct list_head work_list;
        struct list_head *head = &work_list;
        int index = base->timer_jiffies & TVR_MASK;

        /*
         * Cascade timers:
         * The cascade function is used to replenish the timer lists with timers
         * from higher groups.
         */
        if (!index &&
            (!cascade(base, &base->tv2, INDEX(0))) &&
                (!cascade(base, &base->tv3, INDEX(1))) &&
                    !cascade(base, &base->tv4, INDEX(2)))
            cascade(base, &base->tv5, INDEX(3));
            /*
            * All timers located in the first group at the corresponding position
            * for the timer_jiffies value are copied into a temporary list and
            * therefore removed from the original data structures.
            */
        ++base->timer_jiffies;
        list_replace_init(base->tv1.vec + index, &work_list);
        while (!list_empty(head)) {
            void (*fn)(unsigned long);
            unsigned long data;
            timer = list_first_entry(head, struct timer_list,entry);
            fn = timer->function;
            
            data = timer->data;

            timer_stats_account_timer(timer);

            base->running_timer = timer;
                /* Detach the timer from the temporary list */
            detach_timer(timer, 1);

            spin_unlock_irq(&base->lock);
              /*
              * excute the timer's callback function
              */
            call_timer_fn(timer, fn, data);
            spin_lock_irq(&base->lock);
        }
    }
    base->running_timer = NULL;
    spin_unlock_irq(&base->lock);

}

 4.  A whole view of the low-resolution timer system
 
 The high-resolution timer mechanism is based on clock events, whereas the
 low-resolution timer mechanism utiltizes periodic events that can either come
 directly form a low-resolution clock or from the high-resolution subsystem.
 Two important tasks for which low-resolution timers assume responsibility
 are:
    1> Handle the golbal jiffies counter. The value is incremented periodically
        (or at least it looks periodical to most parts of the kernel) and reprents
        a particularly simple form of time reference. As we know the dynamic
        timers implementation is based on the jiffies counter value.
    2> Perform per-process accounting. This also includes handling of classical
       low-resolution timers(dynamic timers), which can be associated with
       any process.
       
Overview of periodic low-resolution timer interrupts

Early kernels in the 2.6 series directly hooked into the timer interrupt to
start timer activation and process accounting, but this has been somewhat
complicated by the introduction of the generic clock framework(we will
talk this later).


The details differ for other architectures, but the principle is nevertheless the
same. A particular architecture proceeds is usually set upt in time_init()
which is called at boot time to initialize the fundamental low-resolution
timekeeping. The periodic clock is set up to operate at HZ ticks per second.
IA-32 register timer_interrupt() as the interrupt handler, whereas AMD64 uses
timer_event_interrupt(). Both functions notify the generic, architecture-independant
time processing layers of the kernel by calling the event handler of the
so called global clock. The handler will set the ball rolling for periodic
low-resolution timekeeping by calling the following two functions.
    - do_time() is responsible for system-wide, global tasks: Update the
        jiffies value, and handle process accounting. On a multiprocessor system,
        one particular CPU is selected to perform both tasks, and all other
        CPUs are not concerned with them.
    - update_process_times() needs to be performed by every CPU on SMP
    systems. Besides process accounting, it activates and expires all registered
    classical low-resolution timers and provides the scheduler with a sense
    of time. Timer activation and expiration is triggered by calling
    run_local_timers. The function, in turn, raises the softIRQ TIMER_SOFTIRQ,
    and the handler function is responsible to run the low-resolution timers.
 
Before we inspect the detail of the two function mentioned above,
jiffies_64 and jiffies will be introduced.

The global variable jiffies_64( an integer varible with 64bits on all architectures)
is incremented by 1. It specifies the exact number of timer interrupt since the
system started. Its value is increased with constant regurlarity when dynamic
ticks are disabled. If dynamic ticks are active, more than one tick period
can have passed since the last update.  jiffies is a variable of the unsigned
long type and is therefore only 4 bytes long on the 32-bits processors.
jiffies and jiffies_64 match in their less significant bits and therefore point
to the same memory location or the same register. The two are the synonymous
on 64-bits machines. This means that the jiffies_64 ++ operation will also
increase jiffies by 1.

- do_time()
    
    Call tree of do_time()
    do_time
            jiffies_64 ++
            update_times
                        update_wall_time
                        calc_load

      
 
1097 /*
1098  * The 64-bit jiffies value is not atomic - you MUST NOT read it
1099  * without sampling the sequence number in xtime_lock.
1100  * jiffies is defined in the linker script...
1101  */
1102 void do_timer(unsigned long ticks)
1103 {
1104     jiffies_64 += ticks;
            /*
            * Updates the wall time that specifies how long the system has
            * already been up and running. In contrast to the jiffies mechanism,
            * the wall clock uses a human readable format(nanoseconds) to
            * reprent the current time.
            */
1105     update_wall_time();
            /*
            * Update the system load statistics that specify how many tasks
            * have on average been waiting on the run queue in a ready-to-run
            * state during the last 1, 5, and, 15 minutes.
            */
1106     calc_global_load(ticks);
1107 }

 - update_process_times()
    
    Call Tree:
    update_process_times
            account_process_tick
            run_local_timers // this is what we concerned
            rcu_check_callbacks
            scheduler_tick()
            run_posix_cpu_timers

 struct task_struct {
 ...
        cputime_t utime, stime; /*@utime denotes the ticks spent in User mode
                                             *@stime denotes the ticks spent in Kernel mode
                                             */
 ...
 }
 
1282 /*
1283  * Called from the timer interrupt handler to charge one tick to the current
1284  * process.  user_tick is 1 if the tick is user time, 0 for system.
1285  */
1286 void update_process_times(int user_tick)
1287 {   
1288     struct task_struct *p = current;
1289     int cpu = smp_processor_id();
1290
1291     /* Note: this timer irq context must be accounted for as well. */
            /*
            *  We don't want to go into the detail of this functin. Here you only
            * need to know it update the values for CPU time consumed in task
            * structure.
            */
1292     account_process_tick(p, user_tick);
            /*
            * Actives and expire the low-resolution timers. Recall that this is
            * discassed in detail above.
            */
1293     run_local_timers();///**********************
1294     rcu_check_callbacks(cpu, user_tick);
1295     printk_tick();
1296 #ifdef CONFIG_IRQ_WORK
1297     if (in_irq())
1298         irq_work_run();
1299 #endif
1300     scheduler_tick();
1301     run_posix_cpu_timers(p);
1302 }  
 


5. Demon of how to use dynamic timer in your modules

    #include <linux/timer.h>
    struct timer_list my_timer;
    
    init_timer(&my_timer);
    my_timer.expire = jiffies +sec*HZ; /* sec is the timeout in number of sec. */
    my_timer.function = timer_func; /* callback function of @my_timer */
    my_timer.data = can_be_devid; /* Parameter to your callback function @timer_func */
    add_timer(&my_timer); /* start the timer */
    
    static void timer_func(unsigned long func_parameter)
    {
        /* do work to be done periodically */
        ...
        mod_timer(...);
    }
   
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值