本文基于linux4.6.3内核版本代码来说明softirq机制,代码在kernel/softirq.c中,代码不算多也就近800行。在中断处理中,分上半部和下半部,有一些任务不是特别紧急的,没必要在关闭中断的条件下处理,可以在开中断的情况下,延迟一段时间放在后边处理,这些情况属于后半部(bottom half)处理部分。在内核中有两种机制来做这一部分工作:软中断(softirq)和工作队列,本文主要介绍前者,另外基于softirq的tasklet会在另外一篇文章介绍。
软中断有点范围广(比如异常也属于软中断),所以下面还是用softirq来精确表示。softirq既然是中断,那么它和硬中断的运作流程应该是相仿的。softirq开始必须被初始化,定义一个可执行的函数;然后进行激活,使得softirq进入挂起状态以便能被调用;中断都可以设置屏蔽状态,softirq同样也可以选择屏蔽;最后会在适当的时机执行softirq。这就是softirq的全部,下面根据这四种情况来具体介绍。
softirq初始化
open_softirq()函数处理softirq的初始化,在内核中用softirq_vec[NR_SOFTIRQS]这个数组来表示softirq,softirq_vec是struct softirq_action类型的,如下所示:
void open_softirq(int nr, void (*action)(struct softirq_action *))
{
softirq_vec[nr].action = action;
}
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
struct softirq_action
{
void (*action)(struct softirq_action *);
};
从代码中可以看出就2参数,其中nr指定哪种softirq,action指定执行的函数,下面要分别介绍一下
softirq种类
上面说了nr来指定要初始化哪种softirq,在4.6.3中softirq有NR_SOFTIRQS种,从数组中就可以看出,种类有这么几种:
enum
{
HI_SOFTIRQ=0,-----------数字越低优先级越高,所以最优先处理,代表高优先级的tasklet
TIMER_SOFTIRQ,----------时钟中断相关的tasklet
NET_TX_SOFTIRQ,---------把数据包传送到网卡
NET_RX_SOFTIRQ,---------从网卡接收数据包
BLOCK_SOFTIRQ,
IRQ_POLL_SOFTIRQ,
TASKLET_SOFTIRQ,--------常规tasklet
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the numbering. Sigh! */
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
};
有关tasklet的介绍见 linux中断子系统 - softirq/tasklet
TODO:其他种类介绍
softirq激活
raise_softirq用来激活softirq,如下:
void raise_softirq(unsigned int nr)
{
unsigned long flags;
local_irq_save(flags);
raise_softirq_irqoff(nr);
local_irq_restore(flags);
}
inline void raise_softirq_irqoff(unsigned int nr)
{
__raise_softirq_irqoff(nr);
/*
* If we're in an interrupt or softirq, we're done
* (this also catches softirq-disabled code). We will
* actually run the softirq once we return from
* the irq or softirq.
*
* Otherwise we wake up ksoftirqd to make sure we
* schedule the softirq soon.
*/
if (!in_interrupt())
wakeup_softirqd();
}
softirq屏蔽
实现softirq屏蔽的关键数据结果是irq_cpustat_t,里面的字段__softirq_pending表示32位掩码,为了获取和设置位掩码的值,内核使用宏local_softirq_pending(),它选择本地cpu的软中断位掩码,cpu的软中断状态用irq_stat[NR_CPUS]全局数组来表示,相关代码如下:
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
#define local_softirq_pending() \
__IRQ_STAT(smp_processor_id(), __softirq_pending)
#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
typedef struct {
unsigned int __softirq_pending;
#ifdef CONFIG_SMP
unsigned int ipi_irqs[NR_IPI];
#endif
} ____cacheline_aligned irq_cpustat_t;
softirq执行
softirq执行是在几个点上进行的,有下面这么几种情况会检查活动的softirq并调用执行:
- 当内核调用local_bh_enable()函数的时候
- 在中断返回调用irq_exit()时或者do_IRQ()时
- 内核线程ksoftirqd/n被唤醒时
- 在多处理器系统中,当CPU处理完处理器间中断时
- 在使用APIC的系统中处理完本地定时器中断时
上面的5种情况,常见的是前三种,本文会介绍第1和3种,第二种会在中断子系统其他文章中介绍,最后两种没接触过不做过多介绍。在这之前,先介绍一下函数do_softirq(),因为softirq的执行最终都是要调用此函数来处理各个softirq的函数。
do_softirq()
asmlinkage __visible void do_softirq(void)
{
__u32 pending;
unsigned long flags;
if (in_interrupt())
return;----------------------------处于中断上下文中或者当前禁用软中断
local_irq_save(flags);-----------------保存IF状态值并禁用本地中断
pending = local_softirq_pending();-----检查是否有挂起的softirq
if (pending)
do_softirq_own_stack();------------调用__do_softirq()
local_irq_restore(flags);--------------恢复IF状态值
}
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current->flags;
int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
bool in_hardirq;
__u32 pending;
int softirq_bit;
/*
* Mask out PF_MEMALLOC s current task context is borrowed for the
* softirq. A softirq handled such as network RX might set PF_MEMALLOC
* again if the socket is related to swap
*/
current->flags &= ~PF_MEMALLOC;
pending = local_softirq_pending();
account_irq_enter_time(current);
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
in_hardirq = lockdep_softirq_start();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
local_irq_enable();
h = softirq_vec;
while ((softirq_bit = ffs(pending))) {----------循环检查每个softirq的pending状态
unsigned int vec_nr;
int prev_count;
h += softirq_bit - 1;
vec_nr = h - softirq_vec;
prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(vec_nr);
trace_softirq_entry(vec_nr);
h->action(h);--------------调用相关种类的softirq函数执行
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
vec_nr, softirq_to_name[vec_nr], h->action,
prev_count, preempt_count());
preempt_count_set(prev_count);
}
h++;
pending >>= softirq_bit;
}
rcu_bh_qs();
local_irq_disable();
pending = local_softirq_pending();
if (pending) {
if (time_before(jiffies, end) && !need_resched() &&
--max_restart)
goto restart;
wakeup_softirqd();------------唤醒内核线程ksoftirqd/n
}
lockdep_softirq_end(in_hardirq);
account_irq_exit_time(current);
__local_bh_enable(SOFTIRQ_OFFSET);
WARN_ON_ONCE(in_interrupt());
tsk_restore_flags(current, old_flags, PF_MEMALLOC);
}
local_bh_enable()
static inline void local_bh_enable(void)
{
__local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
}
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
{
WARN_ON_ONCE(in_irq() || irqs_disabled());
#ifdef CONFIG_TRACE_IRQFLAGS
local_irq_disable();
#endif
/*
* Are softirqs going to be turned on now:
*/
if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
trace_softirqs_on(ip);
/*
* Keep preemption disabled until we are done with
* softirq processing:
*/
preempt_count_sub(cnt - 1);
if (unlikely(!in_interrupt() && local_softirq_pending())) {
/*
* Run softirq if any pending. And do it in its own stack
* as we may be calling this deep in a task call stack already.
*/
do_softirq();----------------执行softirq
}
preempt_count_dec();
#ifdef CONFIG_TRACE_IRQFLAGS
local_irq_enable();
#endif
preempt_check_resched();
}
内核线程ksoftirqd/n
ksoftirqd内核线程为重要而难以平衡的问题提供了解决方案,对于softirq的连续高流量可能会产生问题,开发者可以做出两种选择,第一,忽略do_softirq运行时新出现的softirq,第二,不断地重新检查挂起的softirq,这两种都不能有效解决问题。ksoftirqd内核线程就可以解决这种问题,在机器空闲时,ksoftirqd就可以运行来执行挂起的softirq。
计算机中有几个cpu core,那么就有几个ksoftirqd,如下所示是我的计算机中的ksoftirqd线程数。
lhj@lhj-pc:~/works$ ps aux | grep ksoftirq
root 3 0.0 0.0 0 0 ? S Sep08 0:19 [ksoftirqd/0]
root 10 0.0 0.0 0 0 ? S Sep08 0:13 [ksoftirqd/1]
root 14 0.0 0.0 0 0 ? S Sep08 0:13 [ksoftirqd/2]
root 18 0.0 0.0 0 0 ? S Sep08 0:13 [ksoftirqd/3]
下面我们按照这个逻辑来分析ksoftirqd,它是如何表示的,它在系统中是何时创建的,是怎么创建的,最后它主要做什么工作,好的下面分析:
1.ksoftirqd的定义
下面这个结构体用来表示ksoftirqd
static struct smp_hotplug_thread softirq_threads = {
.store = &ksoftirqd,
.thread_should_run = ksoftirqd_should_run,-----判断是否可以运行
.thread_fn = run_ksoftirqd,----------------ksoftirqd具体工作
.thread_comm = "ksoftirqd/%u",-----------名字,如上计算机中所示
};
2.ksoftirqd的创建
内核在初始化的时候会调用spawn_ksoftirqd来创建内核线程
static __init int spawn_ksoftirqd(void)
{
register_cpu_notifier(&cpu_nfb);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));--调用到smpboot_register_percpu_thread_cpumask
return 0;
}
int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
const struct cpumask *cpumask)
{
unsigned int cpu;
int ret = 0;
if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL))
return -ENOMEM;
cpumask_copy(plug_thread->cpumask, cpumask);
get_online_cpus();
mutex_lock(&smpboot_threads_lock);
for_each_online_cpu(cpu) {
ret = __smpboot_create_thread(plug_thread, cpu);-------为每个cpu创建内核线程
if (ret) {
smpboot_destroy_threads(plug_thread);
free_cpumask_var(plug_thread->cpumask);
goto out;
}
if (cpumask_test_cpu(cpu, cpumask))
smpboot_unpark_thread(plug_thread, cpu);
}
list_add(&plug_thread->list, &hotplug_threads);
out:
mutex_unlock(&smpboot_threads_lock);
put_online_cpus();
return ret;
}
3.ksoftirqd的主要工作
ksoftirqd的主要工作在run_ksoftirqd函数中
static void run_ksoftirqd(unsigned int cpu)
{
local_irq_disable();
if (local_softirq_pending()) {----------检查pending,若有就往下走
/*
* We can safely run softirq on inline stack, as we are not deep
* in the task stack here.
*/
__do_softirq();---------------------执行softirq
local_irq_enable();
cond_resched_rcu_qs();
return;
}
local_irq_enable();
}