Linux中断子系统 - softirq

本文基于linux4.6.3内核版本代码来说明softirq机制,代码在kernel/softirq.c中,代码不算多也就近800行。在中断处理中,分上半部和下半部,有一些任务不是特别紧急的,没必要在关闭中断的条件下处理,可以在开中断的情况下,延迟一段时间放在后边处理,这些情况属于后半部(bottom half)处理部分。在内核中有两种机制来做这一部分工作:软中断(softirq)和工作队列,本文主要介绍前者,另外基于softirq的tasklet会在另外一篇文章介绍。

软中断有点范围广(比如异常也属于软中断),所以下面还是用softirq来精确表示。softirq既然是中断,那么它和硬中断的运作流程应该是相仿的。softirq开始必须被初始化,定义一个可执行的函数;然后进行激活,使得softirq进入挂起状态以便能被调用;中断都可以设置屏蔽状态,softirq同样也可以选择屏蔽;最后会在适当的时机执行softirq。这就是softirq的全部,下面根据这四种情况来具体介绍。

softirq初始化

open_softirq()函数处理softirq的初始化,在内核中用softirq_vec[NR_SOFTIRQS]这个数组来表示softirq,softirq_vec是struct softirq_action类型的,如下所示:

void open_softirq(int nr, void (*action)(struct softirq_action *))
{
    softirq_vec[nr].action = action;
}

static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
struct softirq_action
{
    void    (*action)(struct softirq_action *);
};

从代码中可以看出就2参数,其中nr指定哪种softirq,action指定执行的函数,下面要分别介绍一下

softirq种类

上面说了nr来指定要初始化哪种softirq,在4.6.3中softirq有NR_SOFTIRQS种,从数组中就可以看出,种类有这么几种:

enum
{
    HI_SOFTIRQ=0,-----------数字越低优先级越高,所以最优先处理,代表高优先级的tasklet
    TIMER_SOFTIRQ,----------时钟中断相关的tasklet
    NET_TX_SOFTIRQ,---------把数据包传送到网卡
    NET_RX_SOFTIRQ,---------从网卡接收数据包
    BLOCK_SOFTIRQ,
    IRQ_POLL_SOFTIRQ,
    TASKLET_SOFTIRQ,--------常规tasklet
    SCHED_SOFTIRQ,
    HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the numbering. Sigh! */
    RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */
    NR_SOFTIRQS
};

有关tasklet的介绍见 linux中断子系统 - softirq/tasklet
TODO:其他种类介绍

softirq激活

raise_softirq用来激活softirq,如下:

void raise_softirq(unsigned int nr)
{
    unsigned long flags;
    local_irq_save(flags);
    raise_softirq_irqoff(nr);
    local_irq_restore(flags);
}

inline void raise_softirq_irqoff(unsigned int nr)
{
    __raise_softirq_irqoff(nr);

    /*
     * If we're in an interrupt or softirq, we're done
     * (this also catches softirq-disabled code). We will
     * actually run the softirq once we return from
     * the irq or softirq.
     *
     * Otherwise we wake up ksoftirqd to make sure we
     * schedule the softirq soon.
     */
    if (!in_interrupt())
        wakeup_softirqd();
}

softirq屏蔽

实现softirq屏蔽的关键数据结果是irq_cpustat_t,里面的字段__softirq_pending表示32位掩码,为了获取和设置位掩码的值,内核使用宏local_softirq_pending(),它选择本地cpu的软中断位掩码,cpu的软中断状态用irq_stat[NR_CPUS]全局数组来表示,相关代码如下:

irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;

#define local_softirq_pending() \
    __IRQ_STAT(smp_processor_id(), __softirq_pending)

#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)

typedef struct {
    unsigned int __softirq_pending;
#ifdef CONFIG_SMP
    unsigned int ipi_irqs[NR_IPI];
#endif
} ____cacheline_aligned irq_cpustat_t;

softirq执行

softirq执行是在几个点上进行的,有下面这么几种情况会检查活动的softirq并调用执行:

  1. 当内核调用local_bh_enable()函数的时候
  2. 在中断返回调用irq_exit()时或者do_IRQ()时
  3. 内核线程ksoftirqd/n被唤醒时
  4. 在多处理器系统中,当CPU处理完处理器间中断时
  5. 在使用APIC的系统中处理完本地定时器中断时

上面的5种情况,常见的是前三种,本文会介绍第1和3种,第二种会在中断子系统其他文章中介绍,最后两种没接触过不做过多介绍。在这之前,先介绍一下函数do_softirq(),因为softirq的执行最终都是要调用此函数来处理各个softirq的函数。

do_softirq()

asmlinkage __visible void do_softirq(void)
{
    __u32 pending;
    unsigned long flags;

    if (in_interrupt())
        return;----------------------------处于中断上下文中或者当前禁用软中断

    local_irq_save(flags);-----------------保存IF状态值并禁用本地中断

    pending = local_softirq_pending();-----检查是否有挂起的softirq

    if (pending)
        do_softirq_own_stack();------------调用__do_softirq()

    local_irq_restore(flags);--------------恢复IF状态值
}
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
    unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
    unsigned long old_flags = current->flags;
    int max_restart = MAX_SOFTIRQ_RESTART;
    struct softirq_action *h;
    bool in_hardirq;
    __u32 pending;
    int softirq_bit;

    /*
     * Mask out PF_MEMALLOC s current task context is borrowed for the
     * softirq. A softirq handled such as network RX might set PF_MEMALLOC
     * again if the socket is related to swap
     */
    current->flags &= ~PF_MEMALLOC;

    pending = local_softirq_pending();
    account_irq_enter_time(current);

    __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
    in_hardirq = lockdep_softirq_start();

restart:
    /* Reset the pending bitmask before enabling irqs */
    set_softirq_pending(0);

    local_irq_enable();

    h = softirq_vec;

    while ((softirq_bit = ffs(pending))) {----------循环检查每个softirq的pending状态
        unsigned int vec_nr;
        int prev_count;

        h += softirq_bit - 1;

        vec_nr = h - softirq_vec;
        prev_count = preempt_count();

        kstat_incr_softirqs_this_cpu(vec_nr);

        trace_softirq_entry(vec_nr);
        h->action(h);--------------调用相关种类的softirq函数执行
        trace_softirq_exit(vec_nr);
        if (unlikely(prev_count != preempt_count())) {
            pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
                   vec_nr, softirq_to_name[vec_nr], h->action,
                   prev_count, preempt_count());
            preempt_count_set(prev_count);
        }
        h++;
        pending >>= softirq_bit;
    }

    rcu_bh_qs();
    local_irq_disable();

    pending = local_softirq_pending();
    if (pending) {
        if (time_before(jiffies, end) && !need_resched() &&
            --max_restart)
            goto restart;

        wakeup_softirqd();------------唤醒内核线程ksoftirqd/n
    }

    lockdep_softirq_end(in_hardirq);
    account_irq_exit_time(current);
    __local_bh_enable(SOFTIRQ_OFFSET);
    WARN_ON_ONCE(in_interrupt());
    tsk_restore_flags(current, old_flags, PF_MEMALLOC);
}

local_bh_enable()

static inline void local_bh_enable(void)
{
    __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
}
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
{
    WARN_ON_ONCE(in_irq() || irqs_disabled());
#ifdef CONFIG_TRACE_IRQFLAGS
    local_irq_disable();
#endif
    /*
     * Are softirqs going to be turned on now:
     */
    if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
        trace_softirqs_on(ip);
    /*
     * Keep preemption disabled until we are done with
     * softirq processing:
     */
    preempt_count_sub(cnt - 1);

    if (unlikely(!in_interrupt() && local_softirq_pending())) {
        /*
         * Run softirq if any pending. And do it in its own stack
         * as we may be calling this deep in a task call stack already.
         */
        do_softirq();----------------执行softirq
    }

    preempt_count_dec();
#ifdef CONFIG_TRACE_IRQFLAGS
    local_irq_enable();
#endif
    preempt_check_resched();
}

内核线程ksoftirqd/n

ksoftirqd内核线程为重要而难以平衡的问题提供了解决方案,对于softirq的连续高流量可能会产生问题,开发者可以做出两种选择,第一,忽略do_softirq运行时新出现的softirq,第二,不断地重新检查挂起的softirq,这两种都不能有效解决问题。ksoftirqd内核线程就可以解决这种问题,在机器空闲时,ksoftirqd就可以运行来执行挂起的softirq。

计算机中有几个cpu core,那么就有几个ksoftirqd,如下所示是我的计算机中的ksoftirqd线程数。

lhj@lhj-pc:~/works$ ps aux | grep ksoftirq
root         3  0.0  0.0      0     0 ?        S    Sep08   0:19 [ksoftirqd/0]
root        10  0.0  0.0      0     0 ?        S    Sep08   0:13 [ksoftirqd/1]
root        14  0.0  0.0      0     0 ?        S    Sep08   0:13 [ksoftirqd/2]
root        18  0.0  0.0      0     0 ?        S    Sep08   0:13 [ksoftirqd/3]

下面我们按照这个逻辑来分析ksoftirqd,它是如何表示的,它在系统中是何时创建的,是怎么创建的,最后它主要做什么工作,好的下面分析:

1.ksoftirqd的定义

下面这个结构体用来表示ksoftirqd

static struct smp_hotplug_thread softirq_threads = {
    .store          = &ksoftirqd,
    .thread_should_run  = ksoftirqd_should_run,-----判断是否可以运行
    .thread_fn      = run_ksoftirqd,----------------ksoftirqd具体工作
    .thread_comm        = "ksoftirqd/%u",-----------名字,如上计算机中所示
};

2.ksoftirqd的创建

内核在初始化的时候会调用spawn_ksoftirqd来创建内核线程

static __init int spawn_ksoftirqd(void)
{
    register_cpu_notifier(&cpu_nfb);
    BUG_ON(smpboot_register_percpu_thread(&softirq_threads));--调用到smpboot_register_percpu_thread_cpumask
    return 0;
}
int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
                       const struct cpumask *cpumask)
{
    unsigned int cpu;
    int ret = 0;

    if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL))
        return -ENOMEM;
    cpumask_copy(plug_thread->cpumask, cpumask);

    get_online_cpus();
    mutex_lock(&smpboot_threads_lock);
    for_each_online_cpu(cpu) {
        ret = __smpboot_create_thread(plug_thread, cpu);-------为每个cpu创建内核线程
        if (ret) {
            smpboot_destroy_threads(plug_thread);
            free_cpumask_var(plug_thread->cpumask);
            goto out;
        }
        if (cpumask_test_cpu(cpu, cpumask))
            smpboot_unpark_thread(plug_thread, cpu);
    }
    list_add(&plug_thread->list, &hotplug_threads);
out:
    mutex_unlock(&smpboot_threads_lock);
    put_online_cpus();
    return ret;
}

3.ksoftirqd的主要工作

ksoftirqd的主要工作在run_ksoftirqd函数中

static void run_ksoftirqd(unsigned int cpu)
{
    local_irq_disable();
    if (local_softirq_pending()) {----------检查pending,若有就往下走
        /*
         * We can safely run softirq on inline stack, as we are not deep
         * in the task stack here.
         */
        __do_softirq();---------------------执行softirq
        local_irq_enable();
        cond_resched_rcu_qs();
        return;
    }
    local_irq_enable();
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值