Linux时间子系统(三) -- clockevent, tick device

转载请标明出处floater的csdn blog,http://blog.csdn.net/flaoter

1 clockevent

clockevent是具有编程和事件产生能力的定时器,在我使用的ARM SOC平台上底层依赖的硬件是ARM Generic Timer中的Timers。当然,在SOC系统中应该还会有其它的timer可以使用,但是本文的介绍只限于arm的timer。

1.1 数据结构

struct clock_event_device {
    void            (*event_handler)(struct clock_event_device *); //事件处理函数指针
    int         (*set_next_event)(unsigned long evt,
                          struct clock_event_device *);  //设置下次触发事件,cycles
    int         (*set_next_ktime)(ktime_t expires,
                          struct clock_event_device *);  //设置下次触发事件,ktime
    ktime_t         next_event;
    u64         max_delta_ns;  //最大时间差ns
    u64         min_delta_ns;  //最小时间差ns
    u32         mult;
    u32         shift;   //ns和cylces的转换关系
    enum clock_event_mode   mode;
    unsigned int        features;
    unsigned long       retries;

    void            (*broadcast)(const struct cpumask *mask);
    void            (*set_mode)(enum clock_event_mode mode,
                        struct clock_event_device *);
    void            (*suspend)(struct clock_event_device *);
    void            (*resume)(struct clock_event_device *);
    unsigned long       min_delta_ticks;
    unsigned long       max_delta_ticks;

    const char      *name;
    int         rating;
    int         irq;
    int         bound_on;
    const struct cpumask    *cpumask;
    struct list_head    list;
    struct module       *owner;
} ____cacheline_aligned;

• mode
这个成员是说明clockevent的工作模式,具体的mode设定是由set_mode这个callback函数来完成的。

 enum clock_event_mode {
    CLOCK_EVT_MODE_UNUSED = 0,
    CLOCK_EVT_MODE_SHUTDOWN,
    CLOCK_EVT_MODE_PERIODIC,
    CLOCK_EVT_MODE_ONESHOT,
    CLOCK_EVT_MODE_RESUME,
};

• feature
说明clockevetn设备的特征。CLOCK_EVT_FEAT_PERIODIC说明该硬件timer可以产生周期性的clock event,CLOCK_EVT_FEAT_ONESHOT说明该硬件timer可以产生单触发的clock event。不要将feature和mode的使用场景混淆。

#define CLOCK_EVT_FEAT_PERIODIC     0x000001  //产生周期触发事件的特征
#define CLOCK_EVT_FEAT_ONESHOT      0x000002  //产生单触发事件的特征
#define CLOCK_EVT_FEAT_KTIME        0x000004  //产生事件的时间基准是ktime,不是cycles

• list
内核使用如下两个链表来管理系统中的clock_event_device。clockevent_devices list中是当前active的device。

static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);

1.2 clockevent的建立过程

这里我先对clockevent的注册函数进行介绍,再对在ARM SOC平台调用它的流程进行说明。

1.2.1 clock_event_device的注册

注册函数如下,

void clockevents_register_device(struct clock_event_device *dev)
{
    unsigned long flags;

    BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
    if (!dev->cpumask) {
        WARN_ON(num_possible_cpus() > 1);
        dev->cpumask = cpumask_of(smp_processor_id());
    }

    raw_spin_lock_irqsave(&clockevents_lock, flags);

    list_add(&dev->list, &clockevent_devices);  //将当前clock_event_device设备加入到clockevent_devices list中
    tick_check_new_device(dev);  //通知tick device层进行处理,clockevent的替换也在该函数进行
    clockevents_notify_released(); //遍历clockevents_released list,添加到clockevent_devices list中

    raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}

tick_check_new_device是tick device层的函数,在此只列出它调用的clockevents_exchange_device,clockevents_exchange_device函数不再展开了,它实现将curdev从clockevent_devices list中删除,添加到clockevents_released list中。

void tick_check_new_device(struct clock_event_device *newdev)
{
...
//前面有很多情况的判断,在后续章节中会有介绍
    clockevents_exchange_device(curdev, newdev); //将curdev从clockevent_devices list中删除,添加到clockevents_released list中
    tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
...
}
1.2.2 ARM SOC平台上clockevent的注册

在上一节介绍clocksource的注册时提到过在内核启动阶段的time_init函数的clocksource_of_init中,会对段__clksrc_of_table进行解析,armv8_arch_timer的注册函数arch_timer_init会被调用,在本小节会对此函数进行详解。
在解析此函数之前,先看看dts中关于此timer的定义,

timer {
        compatible = "arm,armv8-timer";
        interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(8)
                     | IRQ_TYPE_LEVEL_LOW)>,
                 <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(8)
                     | IRQ_TYPE_LEVEL_LOW)>,
                 <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(8)
                     | IRQ_TYPE_LEVEL_LOW)>,
                 <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(8)
                     | IRQ_TYPE_LEVEL_LOW)>;
        clock-frequency = <26000000>;
    };

在时间子系统(一)中曾经对ARM Generic Timer进行过说明,每个processor都有如下四个timer, 并且由于它们都是cpu私有的,所以产生的中断都是PPI类型的。
• A Non-secure EL1 physical timer.
• A Secure EL1 physical timer.
• A Non-secure EL2 physical timer.
• A virtual timer.
关于这几个中断,在内核中有如下枚举描述。

enum ppi_nr {
    PHYS_SECURE_PPI,
    PHYS_NONSECURE_PPI,
    VIRT_PPI,
    HYP_PPI,
    MAX_TIMER_PPI
};

回顾了这些知识后,在对arch_timer_init进行解析。

static void __init arch_timer_init(struct device_node *np)
{
    int i;

    if (arch_timers_present & ARCH_CP15_TIMER) {
        pr_warn("arch_timer: multiple nodes in dt, skipping\n");
        return;
    }

    arch_timers_present |= ARCH_CP15_TIMER;  //CP15方式访问
    for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
        arch_timer_ppi[i] = irq_of_parse_and_map(np, i);  //分析dt,申请irq num
    arch_timer_detect_rate(NULL, np);  //timer时钟频率

    /*
     * If HYP mode is available, we know that the physical timer
     * has been configured to be accessible from PL1. Use it, so
     * that a guest can use the virtual timer instead.
     *
     * If no interrupt provided for virtual timer, we'll have to
     * stick to the physical timer. It'd better be accessible...
     */
//如果hyper模式可用或者没给virtual timer分配中断号,需要使用physical timer
//hyper模式下,guest OS需要访问virtual timer,我们还是使用physical timer
    if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) {  
        arch_timer_use_virtual = false;                     

        if (!arch_timer_ppi[PHYS_SECURE_PPI] ||
            !arch_timer_ppi[PHYS_NONSECURE_PPI]) {
            pr_warn("arch_timer: No interrupt available, giving up\n");
            return;
        }
    }

    arch_timer_c3stop = !of_property_read_bool(np, "always-on");

    arch_timer_register(); //arch timer的注册
    arch_timer_common_init(); //之前介绍clocksource的章节有过介绍
}

请注意下面arch_timer_register中使用的是percpu类型的变量,clock_event_device是percpu的资源。

static int __init arch_timer_register(void)
{
    int err;
    int ppi;

    arch_timer_evt = alloc_percpu(struct clock_event_device);  //为clock_event_device类型的percpu变量申请内存
    if (!arch_timer_evt) {
        err = -ENOMEM;
        goto out;
    }

    if (arch_timer_use_virtual) {
        ppi = arch_timer_ppi[VIRT_PPI];
        err = request_percpu_irq(ppi, arch_timer_handler_virt,
                     "arch_timer", arch_timer_evt);
    } else {  //为physcal timer申请中断,需要分别注册secure和non-secure physical timer PPI
        ppi = arch_timer_ppi[PHYS_SECURE_PPI];
        err = request_percpu_irq(ppi, arch_timer_handler_phys,
                     "arch_timer", arch_timer_evt);
        if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) {
            ppi = arch_timer_ppi[PHYS_NONSECURE_PPI];
            err = request_percpu_irq(ppi, arch_timer_handler_phys,
                         "arch_timer", arch_timer_evt);
            if (err)
                free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI],
                        arch_timer_evt);
        }
    }

    if (err) {
        pr_err("arch_timer: can't register interrupt %d (%d)\n",
               ppi, err);
        goto out_free;
    }

    err = register_cpu_notifier(&arch_timer_cpu_nb);
    if (err)
        goto out_free_irq;

    err = arch_timer_cpu_pm_init();
    if (err)
        goto out_unreg_notify;

    /* Immediately configure the timer on the boot CPU */
    arch_timer_setup(this_cpu_ptr(arch_timer_evt));   //注册clock event device

    return 0;
}

平台的percpu timer硬件寄存器都是通过CP15方式来访问。

static int arch_timer_setup(struct clock_event_device *clk)
{
    __arch_timer_setup(ARCH_CP15_TIMER, clk);  //通过协处理器CP15访问timer的寄存器

    if (arch_timer_use_virtual)
        enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0);
    else {
        enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0);  //使能中断
        if (arch_timer_ppi[PHYS_NONSECURE_PPI])
            enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
    }

    arch_counter_set_user_access();
    if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM))
        arch_timer_configure_evtstream();

    return 0;
}

static void __arch_timer_setup(unsigned type,
                   struct clock_event_device *clk)
{
//clock_event_device变量赋值
    clk->features = CLOCK_EVT_FEAT_ONESHOT;

    if (type == ARCH_CP15_TIMER) {   
        if (arch_timer_c3stop)
            clk->features |= CLOCK_EVT_FEAT_C3STOP;
        clk->name = "arch_sys_timer";
        clk->rating = 450;
        clk->cpumask = cpumask_of(smp_processor_id());
        if (arch_timer_use_virtual) {
            clk->irq = arch_timer_ppi[VIRT_PPI];
            clk->set_mode = arch_timer_set_mode_virt;
            clk->set_next_event = arch_timer_set_next_event_virt;
        } else {
            clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
            clk->set_mode = arch_timer_set_mode_phys;
            clk->set_next_event = arch_timer_set_next_event_phys;
        }
    } else {
        clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
        clk->name = "arch_mem_timer";
        clk->rating = 400;
        clk->cpumask = cpu_all_mask;
        if (arch_timer_mem_use_virtual) {
            clk->set_mode = arch_timer_set_mode_virt_mem;
            clk->set_next_event =
                arch_timer_set_next_event_virt_mem;
        } else {
            clk->set_mode = arch_timer_set_mode_phys_mem;
            clk->set_next_event =
                arch_timer_set_next_event_phys_mem;
        }
    }

    clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, clk);

    clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff);  //注册clock_event_device
}

通过__arch_timer_setup函数可见arch-timer的feature并不支持CLOCK_EVT_FEAT_PERIODIC。

void clockevents_config_and_register(struct clock_event_device *dev,
                     u32 freq, unsigned long min_delta,
                     unsigned long max_delta)
{
    dev->min_delta_ticks = min_delta;
    dev->max_delta_ticks = max_delta;
    clockevents_config(dev, freq);
    clockevents_register_device(dev);
}

clockevents_register_device已经在1.2.1节中进行了解析。至此,clockevent的注册过程就完成了。
内核中还有为应用层提供了sysfs的接口,实现过程不再描述了,可以通过如下接口查看。

cat /sys/devices/system/clockevents/clockevent0/current_device 
arch_sys_timer
cat /sys/devices/system/clockevents/clockevent1/current_device 
arch_sys_timer

此外,除了cpu core上的clockevent设备外,kernel中还有broadcast的clockevent注册,在此处也不说明了。

2 tick device

2.1 数据结构

struct tick_device只是对struct clock_event_device的一个封装,加入了运行模式变量,支持PERIODIC和ONESHOT两种模式。

struct tick_device {  
    struct clock_event_device *evtdev;  
    enum tick_device_mode mode;  
}; 
enum tick_device_mode {
    TICKDEV_MODE_PERIODIC,
    TICKDEV_MODE_ONESHOT,
};

请注意此处的TICKDEV_MODE_PERIODIC与clock_event_device的成员feature CLOCK_EVT_FEAT_PERIODIC不要一起理解,即使是CLOCK_EVT_FEAT_ONESHOT的clockevent也可以支持TICKDEV_MODE_PERIODIC模式的tick device。

2.2 tickdevice的建立过程

在clock_event_device的注册过程中会调用tick_check_new_device通知tick device层进行处理,上文中只介绍了更新clockevent,此处对检查处理和tick device设备的创建进行说明。
此函数中的条件判断很多,一些场景在我使用的平台并没有出现,所以我只能按照代码进行理解了。

void tick_check_new_device(struct clock_event_device *newdev)
{
    struct clock_event_device *curdev;
    struct tick_device *td;
    int cpu;

    cpu = smp_processor_id();  //获得本地cpu id
    if (!cpumask_test_cpu(cpu, newdev->cpumask))  //是否为本cpu服务
        goto out_bc;

    td = &per_cpu(tick_cpu_device, cpu);  //获取当前cpu的tick device
    curdev = td->evtdev;  

    /* cpu local device ? */
//此处判断比较复杂,如果newdev只为此cpu服务,继续注册。否则clockevent可以服务多个cpu,这种情况我使用的平台没有出现,不太理解它的处理。
    if (!tick_check_percpu(curdev, newdev, cpu))
        goto out_bc;

    /* Preference decision */
//根据是否支持单触发模式和它的rating值,决定是否替换原来旧的clock_event_device,代码不再进行展开了
    if (!tick_check_preferred(curdev, newdev))   
        goto out_bc;

    if (!try_module_get(newdev->owner))
        return;

    /*
     * Replace the eventually existing device by the new
     * device. If the current device is the broadcast device, do
     * not give it back to the clockevents layer !
     */
    if (tick_is_broadcast_device(curdev)) {
        clockevents_shutdown(curdev);
        curdev = NULL;
    }
    clockevents_exchange_device(curdev, newdev);   //更新clockevent
    tick_setup_device(td, newdev, cpu, cpumask_of(cpu));  
    if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
        tick_oneshot_notify();
    return;
}

根据tick_device_mode会建立周期性的或单触发的tick_device。如果是第一次setup,只能建立周期模式的tick device。

static void tick_setup_device(struct tick_device *td,
                  struct clock_event_device *newdev, int cpu,
                  const struct cpumask *cpumask)
{
    ktime_t next_event;
    void (*handler)(struct clock_event_device *) = NULL;

    /*
     * First device setup ?
     */
    if (!td->evtdev) {   //当前cpu第一次注册tick_device
        /*
         * If no cpu took the do_timer update, assign it to
         * this cpu:
         */
        if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {  //需要有一个global的tick device管理全局的jiffies等时间信息
            if (!tick_nohz_full_cpu(cpu))
                tick_do_timer_cpu = cpu;
            else
                tick_do_timer_cpu = TICK_DO_TIMER_NONE;
            tick_next_period = ktime_get();
            tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
        }

        /*
         * Startup in periodic mode first.
         */
        td->mode = TICKDEV_MODE_PERIODIC;  //当前cpu第一次设定tick device的时候,缺省设定为周期性的tick
    } else {
        handler = td->evtdev->event_handler;
        next_event = td->evtdev->next_event;
        td->evtdev->event_handler = clockevents_handle_noop;
    }

    td->evtdev = newdev;  //将新的clock_event_device赋值给tick_device

    /*
     * When the device is not per cpu, pin the interrupt to the
     * current cpu:
     */
    if (!cpumask_equal(newdev->cpumask, cpumask))
        irq_set_affinity(newdev->irq, cpumask);

    /*
     * When global broadcasting is active, check if the current
     * device is registered as a placeholder for broadcast mode.
     * This allows us to handle this x86 misfeature in a generic
     * way. This function also returns !=0 when we keep the
     * current active broadcast state for this CPU.
     */
    if (tick_device_uses_broadcast(newdev, cpu))
        return;

    if (td->mode == TICKDEV_MODE_PERIODIC)
        tick_setup_periodic(newdev, 0);
    else
        tick_setup_oneshot(newdev, handler, next_event);
}

配置周期性的tick_device,需要调用tick_setup_periodic。前面说过cpu第一次设置tick_device时默认配置成周期触发,所以启动阶段每个cpu都会调用tick_setup_period。

void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
    tick_set_periodic_handler(dev, broadcast);   //(1)

    /* Broadcast setup ? */
    if (!tick_device_is_functional(dev))
        return;

    if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&   
        !tick_broadcast_oneshot_active()) {     //(2)
        clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
    } else {    //(3)
        unsigned long seq;
        ktime_t next;

        do {
            seq = read_seqbegin(&jiffies_lock);
            next = tick_next_period;
        } while (read_seqretry(&jiffies_lock, seq));

        clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);

        for (;;) {
            if (!clockevents_program_event(dev, next, false))
                return;
            next = ktime_add(next, tick_period);
        }
    }
}

(1)设置event_handler=tick_handle_periodic
(2)clock_event_device支持周期触发,只需要设置clock_event_device的模式为周期触发
(3)clock_event_device不支持周期触发,将clock_event_device设置为单触发模式,并使用clockevents_program_event编程设置下一事件。
在下一次clockevent事件发生时都会调用tick_handle_periodic。
周期性tick的clock event handler的处理函数tick_handle_periodic分析如下,

void tick_handle_periodic(struct clock_event_device *dev)
{
    int cpu = smp_processor_id();
    ktime_t next = dev->next_event;

    tick_periodic(cpu);  //(1)

    if (dev->mode != CLOCK_EVT_MODE_ONESHOT)  //模式为周期模式的clockevent直接返回
        return;
    for (;;) {  //模式为单触发的clockevent还需要按照前面的配置,再使用clockevents_program_event编程设置下一事件
        /*
         * Setup the next period for devices, which do not have
         * periodic mode:
         */
        next = ktime_add(next, tick_period);

        if (!clockevents_program_event(dev, next, false))
            return;
        /*
         * Have to be careful here. If we're in oneshot mode,
         * before we call tick_periodic() in a loop, we need
         * to be sure we're using a real hardware clocksource.
         * Otherwise we could get trapped in an infinite
         * loop, as the tick_periodic() increments jiffies,
         * which then will increment time, possibly causing
         * the loop to trigger again and again.
         */
        if (timekeeping_valid_for_hres())
            tick_periodic(cpu);
    }
}

tick_period会处理全局的时间信息更新任务和本地cpu上的进程时间信息。处理全局时间jiffies时,它需要选用一个全局的tick device来执行。

static void tick_periodic(int cpu)
{
    if (tick_do_timer_cpu == cpu) {   //如果当前tick_device是全局的tick_device
        write_seqlock(&jiffies_lock);

        /* Keep track of the next tick event */
        tick_next_period = ktime_add(tick_next_period, tick_period);

        do_timer(1);  //更新jiffies
        write_sequnlock(&jiffies_lock);
        update_wall_time(); //更新墙上时间
    }

    update_process_times(user_mode(get_irq_regs()));  //更新进程的时间信息,并调用周期调度器schedulet_tick
    profile_tick(CPU_PROFILING);
}

到此介绍了每个cpu的tick_device的注册,启动阶段tick_device工作在周期触发模式,并且它对应的event_handler为tick_handle_periodic。在高分辨时钟(CONFIG_HIGH_RES_TIMERS)和动态时钟(CONFIG_TICKLESS)特性开启后还会有变化。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值