中断初始化
void__init early_trap_init(void)
{
//CONFIG_VECTORS_BASE是make menuconfig的时候配置的物理地址
unsigned long vectors = CONFIG_VECTORS_BASE;
extern char __stubs_start[],__stubs_end[];
extern char __vectors_start[],__vectors_end[];
extern char __kuser_helper_start[], __kuser_helper_end[];
int kuser_sz = __kuser_helper_end -__kuser_helper_start;
/*
* Copy the vectors, stubs and kuserhelpers (in entry-armv.S)
* into the vector page, mapped at0xffff0000, and ensure these
* are visible to the instructionstream.
*/
//拷贝中断异常向量表
memcpy((void *)vectors, __vectors_start,
__vectors_end - __vectors_start);
//拷贝irq-vectors
memcpy((void *)vectors + 0x200, __stubs_start,
__stubs_end - __stubs_start);
//拷贝辅助函数
memcpy((void *)vectors + 0x1000 - kuser_sz,
__kuser_helper_start, kuser_sz);
/*
* Copy signal return handlers into thevector page, and
* set sigreturn to be a pointer tothese.
*/
memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
sizeof(sigreturn_codes));
memcpy((void *)KERN_RESTART_CODE, syscall_restart_code,
sizeof(syscall_restart_code));
flush_icache_range(vectors, vectors +PAGE_SIZE);
modify_domain(DOMAIN_USER,DOMAIN_CLIENT);
}
irq-vectors@kernel/arch/arm/kernel/entry-armv.S:
__stubs_start:
/*
* Interrupt dispatcher
*/
//vector_stub是一个宏,后面介绍
vector_stub irq, IRQ_MODE, 4
.long __irq_usr @ 0 (USR_26 / USR_32)
.long __irq_invalid @ 1 (FIQ_26 / FIQ_32)
.long __irq_invalid @ 2 (IRQ_26 / IRQ_32)
.long __irq_svc @ 3 (SVC_26 / SVC_32)
.long __irq_invalid @ 4
.long __irq_invalid @ 5
.long __irq_invalid @ 6
……
.word vector_swi
.globl __stubs_end
__stubs_end:
中断异常向量表@kernel/arch/arm/kernel/entry-armv.S
__vectors_start:
ARM( swi SYS_ERROR0 )
THUMB( svc #0 )
THUMB( nop )
W(b) vector_und + stubs_offset
W(ldr) pc, .LCvswi + stubs_offset
W(b) vector_pabt + stubs_offset
W(b) vector_dabt + stubs_offset
W(b) vector_addrexcptn + stubs_offset
W(b) vector_irq + stubs_offset
W(b) vector_fiq + stubs_offset
.globl __vectors_end
__vectors_end:
@kernel/arch/arm/configs/msm7630_joe_defconfig
CONFIG_VECTORS_BASE=0xffff0000
@kernel/arch/arm/kernel/signal.h
#defineKERN_SIGRETURN_CODE (CONFIG_VECTORS_BASE + 0x00000500)
#defineKERN_RESTART_CODE (KERN_SIGRETURN_CODE+
sizeof(sigreturn_codes))
@kernel/arch/arm/kernel/entry-armv.S:1231:
.equ stubs_offset,__vectors_start + 0x200 - __stubs_start
vector_stub是一个宏,例如:
vector_stub irq, IRQ_MODE, 4
会被解析为
vector_irq:
.if 4
sub lr, lr, #4
.endif
……
由此可见,vectory_stub实际上就是一个函数模板,可以用来定义许多散转函数,这些散转函数会被拷贝到0xffff0200开始的地址处
该宏的定义为:
@kernel/arch/arm/kernel/entry-armv.S
.macro vector_stub,name, mode, correction=0
.align 5
vector_\name:
.if \correction
sub lr, lr, #\correction
.endif
@
@ Save r0, lr_<exception> (parentPC) and spsr_<exception>
@ (parent CPSR)
@
stmia sp, {r0, lr} @ save r0,lr
mrs lr, spsr
str lr, [sp, #8] @ savespsr
@
@ Prepare for SVC32 mode. IRQs remain disabled.
@
mrs r0, cpsr
eor r0, r0, #(\mode ^ SVC_MODE | PSR_ISETSTATE)
msr spsr_cxsf, r0
@
@ thebranch table must immediately follow this code
@ 这个函数会调转到后面紧跟的散转函数表
and lr, lr, #0x0f
THUMB( adr r0, 1f )
THUMB( ldr lr, [r0, lr, lsl #2] )
mov r0, sp
ARM( ldr lr, [pc, lr, lsl #2] )
@ branch to handler in SVC mode
movs pc, lr
ENDPROC(vector_\name)
.align 2
因为中断向量表是被拷贝到0xffff0000到0xffff0200这段地址的,因此
W(b) vector_irq + stubs_offset
这句话执行的时候,pc的值也是0xffff0000+中断向量表中这句话的位置,而b这个跳转指令又是相对当前指令跳转一个偏移量,想要跳转的目标地址是被拷贝到0xffff0200
开始的区域的散转函数的地址,因此这个偏移量就确定为“散转函数相对于散转函数表的偏移(vector_irq-__stubs_start)+散转函数表的首地址”而散转函数表的首地址又可以用“中断向量表首地址+0x200”表示(这里好像有点问题)
执行完vector_\name(例如“vector_stub irq, IRQ_MODE, 4”)
函数之后就会跳转到其后紧跟的中断处理函数,例如(__irq_usr)
__stubs_start:
/*
* Interrupt dispatcher
*/
vector_stub irq, IRQ_MODE, 4
.long __irq_usr @ 0 (USR_26 / USR_32)
.long __irq_invalid @ 1 (FIQ_26 / FIQ_32)
.long __irq_invalid @ 2 (IRQ_26 / IRQ_32)
.long __irq_svc @ 3 (SVC_26 / SVC_32)
.long __irq_invalid @ 4
.long __irq_invalid @ 5
.long __irq_invalid @ 6
……
vector_stub dabt, ABT_MODE, 8
.long __dabt_usr @ 0 (USR_26 / USR_32)
.long __dabt_invalid @ 1 (FIQ_26 / FIQ_32)
.long __dabt_invalid @ 2 (IRQ_26 / IRQ_32)
.long __dabt_svc @ 3 (SVC_26 / SVC_32)
.long __dabt_invalid @ 4
……
……
.globl __stubs_end
__stubs_end:
__irq_usr又会内嵌irq_handler这个宏
.align 5
__irq_usr:
usr_entry
kuser_cmpxchg_check
get_thread_info tsk
#ifdef CONFIG_PREEMPT
ldr r8, [tsk, #TI_PREEMPT] @get preempt count
add r7, r8, #1 @ increment it
str r7, [tsk, #TI_PREEMPT]
#endif
irq_handler
#ifdef CONFIG_PREEMPT
ldr r0, [tsk, #TI_PREEMPT]
str r8, [tsk, #TI_PREEMPT]
teq r0, r7
ARM( strne r0, [r0, -r0] )
THUMB( movne r0, #0 )
THUMB( strne r0, [r0] )
#endif
mov why, #0
b ret_to_user
UNWIND(.fnend )
ENDPROC(__irq_usr)
在irq_handler这个宏里面又会调用asm_do_IRQ
.macro irq_handler
get_irqnr_preamble r5, lr
1: get_irqnr_and_base r0, r6, r5, lr //获得中断号并存入r0
movne r1, sp
@
@ routine called with r0 = irq number, r1 = struct pt_regs*
@
adrne lr, BSYM(1b)
bne asm_do_IRQ
#ifdef CONFIG_SMP
/*
* XXX
*
* this macro assumes that irqstat (r6)and base (r5) are
* preserved from get_irqnr_and_baseabove
*/
test_for_ipi r0, r6, r5, lr
movne r0, sp
adrne lr, BSYM(1b)
bne do_IPI
#ifdef CONFIG_LOCAL_TIMERS
test_for_ltirq r0, r6, r5, lr
movne r0, sp
adrne lr, BSYM(1b)
bne do_local_timer
#endif
#endif
.endm
+----------------------------
+0xffff0000<--__vectors_start
+
+
+----------------------------
+0xffff0200<--__stubs_start
+
+
+-------------------------------
+
+
+
+
+0xffff1000-kuser_sz<--__kuser_helper_start
+
+
+0xffff1000
softirq
//kernel/arch/arm/kernel/irq.c
asmlinkage void __exception
asm_do_IRQ(unsigned int irq, structpt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
perf_mon_interrupt_in();
irq_enter();
/*
* Some hardware givesrandomly wrong interrupts. Rather
* than crashing, dosomething sensible.
*/
if (unlikely(irq >= NR_IRQS)) {
if (printk_ratelimit())
printk(KERN_WARNING "Bad IRQ%u\n", irq);
ack_bad_irq(irq);
} else {
generic_handle_irq(irq);
}
/* AT91 specific workaround */
irq_finish(irq);
irq_exit();
set_irq_regs(old_regs);
perf_mon_interrupt_out();
}
asm_do_IRQ(irq,regs)
+--irq_enter()
+--ack_bad_irq()
+--generic_handle_irq()
+--irq_exit()
irq_enter
//标记抢占(抢占计数器preempt_count加1)
//Enter an interrupt context.
void irq_enter(void)
{
int cpu = smp_processor_id();
rcu_irq_enter();
if (idle_cpu(cpu) && !in_interrupt()) {
__irq_enter();
tick_check_idle(cpu);
} else
__irq_enter();
}
/*
* It is safe to do non-atomic ops on->hardirq_context,
* because NMI handlers may not preempt and theops are
* always balanced, so the interrupted value of->hardirq_context
* will always be restored.
*/
#define __irq_enter() \
do { \
account_system_vtime(current);\ //这个是空函数
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
//标记抢占(抢占计数器加1)
#define add_preempt_count(val) \
do { preempt_count() += (val);} while (0)
#definepreempt_count() (current_thread_info()->preempt_count)
//利用堆栈指针找到thread_info结构
static inline structthread_info *
current_thread_info(void)
{
register unsigned long sp asm ("sp");//堆栈指针
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
ack_bad_irq
kernel/arch/armkernel/arch/kernel/irq.c:
unsignedlong irq_err_count;
kernel/arch/arm/include/asm/hw_irq.h:
static inline void
ack_bad_irq(int irq)
{
extern unsigned long irq_err_count;
irq_err_count++;
}
generic_handle_irq
staticinline void
generic_handle_irq(unsigned int irq)
{
generic_handle_irq_desc(irq, irq_to_desc(irq));
}
staticinline void
generic_handle_irq_desc(unsigned int irq, structirq_desc *desc)
{
#ifdefCONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
desc->handle_irq(irq, desc);
#else
//一般驱动在调用request_irq()注册中断的时候会把中断处理函数地址
//写到中断号irq对应的中断描述符的desc->handler中去,而不是
//desc->handle_irq(),因此对于驱动注册的中断一般会走__do_IRQ()
//__do_IRQ()通过irq对应的desc拿到相应的中断处理函数desc->handler,
if (likely(desc->handle_irq))
desc->handle_irq(irq, desc);
else
__do_IRQ(irq);
#endif
}
__do_IRQ(irq)
+--handle_IRQ_event(irq, desc->action);
+-- action->handler(irq,action->dev_id)
static int
msm_irq_set_type(unsigned int irq, unsigned intflow_type)
{
void __iomem *treg = VIC_INT_TYPE0 + ((irq & 32) ? 4 : 0);
void __iomem *preg = VIC_INT_POLARITY0 + ((irq & 32) ? 4 :0);
unsigned index = (irq >> 5) & 1;
int b = 1 << (irq & 31);
uint32_t polarity;
uint32_t type;
polarity = msm_irq_shadow_reg[index].int_polarity;
if (flow_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW))
polarity |= b;
if (flow_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_HIGH))
polarity &= ~b;
writel(polarity, preg);
msm_irq_shadow_reg[index].int_polarity = polarity;
type = msm_irq_shadow_reg[index].int_type;
if (flow_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)){
type |= b;
irq_desc[irq].handle_irq= handle_edge_irq;
}
if (flow_type & (IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW)) {
type &= ~b;
irq_desc[irq].handle_irq= handle_level_irq;
}
writel(type, treg);
msm_irq_shadow_reg[index].int_type = type;
return 0;
}
static struct irq_chipmsm_irq_chip = {
.name ="msm",
.disable = msm_irq_mask,
.ack = msm_irq_ack,
.mask = msm_irq_mask,
.unmask =msm_irq_unmask,
.set_wake = msm_irq_set_wake,
.set_type = msm_irq_set_type,
};
irq_exit
irq_exit()
+-- sub_preempt_count(IRQ_EXIT_OFFSET)
+-- invoke_softirq()
+-- __do_softirq()
+-- local_irq_enable()
+-- h = softirq_vec;
+-- h->action(h);
+-- local_irq_disable();
+-- wakeup_softirqd();
void irq_exit(void)
{
account_system_vtime(current);
trace_hardirq_exit();
sub_preempt_count(IRQ_EXIT_OFFSET);
//如果当前不在中断中,且有软中断要处理则处理软中断
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
#ifdef CONFIG_NO_HZ
/* Make sure that timer wheel updates are propagated */
rcu_irq_exit();
if (idle_cpu(smp_processor_id()) && !in_interrupt()&&
!need_resched())
tick_nohz_stop_sched_tick(0);
#endif
preempt_enable_no_resched();
}
void __kprobes
sub_preempt_count(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
return;
/*
* Is the spinlock portionunderflowing?
*/
if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
!(preempt_count() & PREEMPT_MASK)))
return;
#endif
if (preempt_count() == val)
trace_preempt_on(CALLER_ADDR0,get_parent_ip(CALLER_ADDR1));
preempt_count() -= val;
}
#definepreempt_count() (current_thread_info()->preempt_count)
static inline structthread_info *
current_thread_info(void)
{
register unsigned long sp asm ("sp");
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
#ifdef__ARCH_IRQ_EXIT_IRQS_DISABLED
#define invoke_softirq() __do_softirq()
#else
#define invoke_softirq() do_softirq()
#endif
asmlinkage void
__do_softirq(void)
{
struct softirq_action *h;
__u32 pending;
//#define MAX_SOFTIRQ_RESTART 10
int max_restart = MAX_SOFTIRQ_RESTART;
int cpu;
//提取挂起的软中断的掩码
//irq_stat[smp_processor_id()].__softirq_pending
pending = local_softirq_pending();
account_system_vtime(current);
//preempt_count软中断屏蔽深度计数器加1
__local_bh_disable((unsignedlong)
__builtin_return_address(0));
lockdep_softirq_enter();
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
//使能本地中断
local_irq_enable();
//从softirq_vec[]数组头开始遍历该数组,如果有挂起的软中断
//(pending&1==1),则执行h->action(h)
h = softirq_vec;
do {
if (pending & 1) {
int prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(h- softirq_vec);
trace_softirq_entry(h, softirq_vec);
h->action(h);
trace_softirq_exit(h, softirq_vec);
if (unlikely(prev_count != preempt_count())) {
printk(KERN_ERR "huh, entered softirq %td %s%p"
"withpreempt_count %08x,"
" exited with %08x?\n", h -softirq_vec,
softirq_to_name[h - softirq_vec],
h->action, prev_count, preempt_count());
preempt_count() = prev_count;
}
rcu_bh_qs(cpu);
}
h++;
pending >>= 1;
} while (pending);
//禁止本地中断
local_irq_disable();
//如果执行完一轮之后还有挂起的软中断,则在次遍历softirq_vec[]数组,
//最多10次
pending = local_softirq_pending();
if (pending && --max_restart)
goto restart;
//如果遍历10次之后还有挂起的软中断,则唤醒softirqd守护进程
if (pending)
wakeup_softirqd();
lockdep_softirq_exit();
account_system_vtime(current);
_local_bh_enable();
}
/* arch independent irq_stat fields */
#definelocal_softirq_pending() \
__IRQ_STAT(smp_processor_id(),__softirq_pending)
#define__IRQ_STAT(cpu, member) (irq_stat[cpu].member)
kernel/kernel/softirq.c:
irq_cpustat_tirq_stat[NR_CPUS]____cacheline_aligned;
kernel/arch/arm/include/asm/hardirq.c:
typedefstruct {
unsigned int __softirq_pending;
unsigned int local_timer_irqs;
}____cacheline_aligned irq_cpustat_t;
所以local_softirq_pending()实际上就是
irq_stat[smp_processor_id()].__softirq_pending
staticinline void
__local_bh_disable(unsigned long ip)
{
add_preempt_count(SOFTIRQ_OFFSET);
barrier();
}
kernel/kernel/softirq.c:
staticstruct softirq_action
softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
struct softirq_action
{
void (*action)(struct softirq_action *);
};
static inline void
kstat_incr_softirqs_this_cpu(unsigned int irq)
{
kstat_this_cpu.softirqs[irq]++;
}
/*
* we cannot loop indefinitely here to avoiduserspace starvation,
* but we also don't want to introduce a worstcase 1/HZ latency
* to the pending events, so lets the schedulerto balance
* the softirq load for us.
*/
void wakeup_softirqd(void)
{
/* Interrupts are disabled: no need to stop preemption */
struct task_struct *tsk= __get_cpu_var(ksoftirqd);
if (tsk && tsk->state != TASK_RUNNING)
wake_up_process(tsk);
}
#ifdefCONFIG_SMP
#define__get_cpu_var(var) \
(*SHIFT_PERCPU_PTR(&per_cpu_var(var),my_cpu_offset))
#else
#define__get_cpu_var(var) per_cpu_var(var)
#endif
per_cpu_var()比较简单:
#defineper_cpu_var(var) per_cpu__##var
所以per_cpu_var(ksoftirqd)就是per_cpu__ksoftirqd
它是这么来的:
kernel/kernel/softirq.c:
staticDEFINE_PER_CPU(struct task_struct *,ksoftirqd);
#defineDEFINE_PER_CPU(type, name) \
DEFINE_PER_CPU_SECTION(type, name,"")
#defineDEFINE_PER_CPU_SECTION(type, name, sec) \
__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES \
__typeof__(type)per_cpu__##name
#define__PCPU_ATTRS(sec) \
__attribute__((section(PER_CPU_BASE_SECTIONsec))) \
PER_CPU_ATTRIBUTES
#ifdef CONFIG_SMP
#define PER_CPU_BASE_SECTION".data.percpu"
#else
#define PER_CPU_BASE_SECTION".data"
#endif
所以,
staticDEFINE_PER_CPU(struct task_struct *,ksoftirqd);
就是:
__attribute__((section(".data.percpu""")))
__typeof__(structtask_struct *) per_cpu__ksoftirqd
先看my_cpu_offset是什么
#define my_cpu_offset __my_cpu_offset
#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
#define raw_smp_processor_id()(current_thread_info()->cpu)
#define per_cpu_offset(x) (__per_cpu_offset[x])
所以my_cpu_offset就是
(__per_cpu_offset[(current_thread_info()->cpu)])
unsigned long __per_cpu_offset[NR_CPUS]__read_mostly;
void__init
setup_per_cpu_areas(void)
{
unsigned long delta;
unsigned int cpu;
int rc;
/*
*Always reserve area for module percpu variables. That's
*what the legacy allocator did.
*/
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
if (rc < 0)
panic("Failed to initialized percpuareas.");
delta = (unsigned long)pcpu_base_addr –
(unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu)
__per_cpu_offset[cpu]= delta + pcpu_unit_offsets[cpu];
}
在看SHIFT_PERCPU_PTR:
#define SHIFT_PERCPU_PTR(__p,__offset)\
RELOC_HIDE((__p), (__offset))
# define RELOC_HIDE(ptr,off) \
({ unsigned long__ptr; \
__ptr = (unsigned long)(ptr); \
(typeof(ptr))(__ptr + (off)); })
在看ksoftirqd这个守护进程是怎么来的:
//kernel/kernel/softirq.c
staticint ksoftirqd(void * __bind_cpu)
{
set_current_state(TASK_INTERRUPTIBLE);
while(!kthread_should_stop()) {
preempt_disable();
//如果没有挂起的软中断,重新调度
if (!local_softirq_pending()) {
preempt_enable_no_resched();
schedule();
preempt_disable();
}
//进程被唤醒,设置当前进程状态为TASK_TUNNING
__set_current_state(TASK_RUNNING);
while (local_softirq_pending()) {
/* Preempt disable stops cpu goingoffline.
If already offline, we'll be on wrong CPU:
don't process */
if (cpu_is_offline((long)__bind_cpu))
goto wait_to_die;
do_softirq();
preempt_enable_no_resched();
cond_resched();
preempt_disable();
rcu_sched_qs((long)__bind_cpu);
}
preempt_enable();
set_current_state(TASK_INTERRUPTIBLE);
}// while (!kthread_should_stop())
__set_current_state(TASK_RUNNING);
return 0;
wait_to_die:
preempt_enable();
/* Wait for kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
}
asmlinkagevoid
do_softirq(void)
{
__u32 pending;
unsigned long flags;
//如果已经是中断上下文,直接返回
if (in_interrupt())
return;
//关中断
local_irq_save(flags);
//查看是否有挂起的软中断
pending = local_softirq_pending();
//如果有挂起的软中断,则处理之
if (pending)
__do_softirq();
//开中断
local_irq_restore(flags);
}
外部中断源列表:
kernel/arch/arm/mach-msm/include/mach/irq.h
#ifndef__ASM_ARCH_MSM_IRQS_H
#define__ASM_ARCH_MSM_IRQS_H
#defineMSM_IRQ_BIT(irq) (1 << ((irq)& 31))
#ifdefined(CONFIG_ARCH_MSM7X30)
#include"irqs-7x30.h"
#elifdefined(CONFIG_ARCH_QSD8X50)
#include"irqs-8x50.h"
#include"sirc.h"
#elifdefined(CONFIG_ARCH_MSM8X60)
#include"irqs-8x60.h"
#elifdefined(CONFIG_ARCH_MSM_ARM11)
#include"irqs-7xxx.h"
#else
#error"Unknown architecture specification"
#endif
#defineNR_IRQS (NR_MSM_IRQS + NR_GPIO_IRQS + NR_BOARD_IRQS)
#defineMSM_GPIO_TO_INT(n) (NR_MSM_IRQS + (n))
#defineFIRST_GPIO_IRQ MSM_GPIO_TO_INT(0)
#defineMSM_INT_TO_REG(base, irq) (base + irq / 32)
#endif
这个是实际定义外部中断源的文件:
kernel/arch/arm/mach-msm/include/mach/irqs-8x60.h
#defineGIC_PPI_START 16
#defineGIC_SPI_START 32
#defineINT_DEBUG_TIMER_EXP (GIC_PPI_START +0)
#defineINT_GP_TIMER_EXP (GIC_PPI_START +1)
#defineINT_GP_TIMER2_EXP (GIC_PPI_START +2)
……
tasklet
建立在softirq的基础之上
#define DECLARE_TASKLET(name, func, data) \
struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func,data }
tasklet_schedule(t)
+-- __tasklet_schedule(t)
+-- raise_softirq_irqoff(TASKLET_SOFTIRQ);
+-- wakeup_softirqd()
+-- wakeup_softirqd(void)
staticinline void tasklet_schedule(struct tasklet_struct *t)
{
if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
__tasklet_schedule(t);
}
void__tasklet_schedule(struct tasklet_struct *t)
{
unsigned long flags;
local_irq_save(flags);
t->next = NULL;
*__get_cpu_var(tasklet_vec).tail = t;
__get_cpu_var(tasklet_vec).tail =&(t->next);
raise_softirq_irqoff(TASKLET_SOFTIRQ);
local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_schedule);
inlinevoid raise_softirq_irqoff(unsigned int nr)
{
//标记有软中断挂起
__raise_softirq_irqoff(nr);
/*
* If we're in an interrupt or softirq,we're done
* (this also catches softirq-disabledcode). We will
* actually run the softirq once wereturn from
* the irq or softirq.
*
* Otherwise we wake up ksoftirqd tomake sure we
* schedule the softirq soon.
*/
//如果是中断上下文,则软中断将被忽略,否则唤醒softirqd
if (!in_interrupt())
wakeup_softirqd();
}
#define __raise_softirq_irqoff(nr) do {or_softirq_pending(1UL << (nr)); } while (0)
#define or_softirq_pending(x) \
percpu_or(irq_stat.__softirq_pending,(x))
voidwakeup_softirqd(void)
{
/* Interrupts are disabled: no need tostop preemption */
struct task_struct *tsk =__get_cpu_var(ksoftirqd);
if (tsk && tsk->state !=TASK_RUNNING)
wake_up_process(tsk);
}
ksoftirqd又是怎样一个内核线程呢?
static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
staticint run_ksoftirqd(void* __bind_cpu)
{
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
preempt_disable();
if (!local_softirq_pending()) {
preempt_enable_no_resched();
schedule();
preempt_disable();
}
__set_current_state(TASK_RUNNING);
while (local_softirq_pending()){
/* Preempt disablestops cpu going offline.
If already offline,we'll be on wrong CPU:
don't process */
if (cpu_is_offline((long)__bind_cpu))
gotowait_to_die;
do_softirq();
preempt_enable_no_resched();
cond_resched();
preempt_disable();
rcu_note_context_switch((long)__bind_cpu);
}
preempt_enable();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
wait_to_die:
preempt_enable();
/* Wait for kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
}
staticint __cpuinit cpu_callback(structnotifier_block *nfb,
unsigned longaction,
void *hcpu)
{
int hotcpu = (unsigned long)hcpu;
struct task_struct *p;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
p = kthread_create(run_ksoftirqd,hcpu,
"ksoftirqd/%d",hotcpu);
if (IS_ERR(p)) {
printk("ksoftirqd for %ifailed\n", hotcpu);
returnnotifier_from_errno(PTR_ERR(p));
}
kthread_bind(p, hotcpu);
per_cpu(ksoftirqd, hotcpu) = p;
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
wake_up_process(per_cpu(ksoftirqd, hotcpu));
break;
}
return NOTIFY_OK;
}
static__init int spawn_ksoftirqd(void)
{
void *cpu = (void *)(long)smp_processor_id();
int err = cpu_callback(&cpu_nfb,CPU_UP_PREPARE, cpu);
BUG_ON(err != NOTIFY_OK);
cpu_callback(&cpu_nfb, CPU_ONLINE,cpu);
register_cpu_notifier(&cpu_nfb);
return 0;
}
early_initcall(spawn_ksoftirqd);
workqueue
worker_thread
cleanup_workqueue_thread
create_workqueue_thread
start_workqueue_thread
is_wq_single_thread
初始化
rest_init()
>>do_basic_setup()..>>
..>> do_basic_setup()..>>
..>> init_workqueues()
void __init init_workqueues(void)
{
alloc_cpumask_var(&cpu_populated_map,GFP_KERNEL);
cpumask_copy(cpu_populated_map,cpu_online_mask);
singlethread_cpu =cpumask_first(cpu_possible_mask);
cpu_singlethread_map= cpumask_of(singlethread_cpu);
hotcpu_notifier(workqueue_cpu_callback, 0);
keventd_wq= create_workqueue("events");
BUG_ON(!keventd_wq);
}
static struct workqueue_struct *keventd_wq __read_mostly;
内核在初始化的时候会创建一个名为”events”的workqueue,用一个workqueue_struct*指针keventd_wq指向它,
通过schedule_work()/schedule_work_on()/schedule_delayed_work()可以将workstruct插入“events”工作队列,从而workstruct内嵌的函数就可以得到执行。
int schedule_work(struct work_struct *work)
{
return queue_work(keventd_wq, work);
}
int schedule_work_on(int cpu, structwork_struct *work)
{
return queue_work_on(cpu, keventd_wq,work);
}
int schedule_delayed_work(structdelayed_work *dwork,
unsigned long delay)
{
return queue_delayed_work(keventd_wq,dwork, delay);
}
keventd_wq是内核创建的工作队列,工作队列是串行执行的,因此将过多的workstruct挂在同一个工作队列上应该会导致比较大的延迟,因此比较好的方式应该是自己创建一个专用的workqueue,然后把自己的work_struct挂上去。
#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
>>
所以,hotcpu_notifier(workqueue_cpu_callback,0);就是
do { (void)( workqueue_cpu_callback); } while (0);
(fn)后面没有函数调用,所以它应该不是一个函数调用。
create_workqueue
#define create_workqueue(name)__create_workqueue((name), 0, 0, 0)
>>
#define __create_workqueue(name, singlethread, freezeable,rt) \
__create_workqueue_key((name),(singlethread),(freezeable),\
(rt),NULL,NULL)
>>
所以keventd_wq =create_workqueue("events");就是
__create_workqueue_key(("events"), 0, 0, 0, NULL, NULL);
struct workqueue_struct *
__create_workqueue_key(const char *name, intsinglethread,
int freezeable, intrt, struct lock_class_key *key,
const char*lock_name)
{
struct workqueue_struct *wq;
struct cpu_workqueue_struct *cwq;
int err = 0, cpu;
wq= kzalloc(sizeof(*wq), GFP_KERNEL);
if (!wq)
return NULL;
wq->cpu_wq= alloc_percpu(struct cpu_workqueue_struct);
if (!wq->cpu_wq) {
kfree(wq);
return NULL;
}
wq->name = name;
lockdep_init_map(&wq->lockdep_map,lock_name, key, 0);
wq->singlethread = singlethread;
wq->freezeable = freezeable;
wq->rt = rt;
INIT_LIST_HEAD(&wq->list);
if (singlethread) {
cwq = init_cpu_workqueue(wq,singlethread_cpu);
err =create_workqueue_thread(cwq, singlethread_cpu);
start_workqueue_thread(cwq,-1);
} else {
cpu_maps_update_begin();
/*
*We must place this wq on list even if the code below fails.
*cpu_down(cpu) can remove cpu from cpu_populated_map before
*destroy_workqueue() takes the lock, in that case we leak
*cwq[cpu]->thread.
*/
spin_lock(&workqueue_lock);
list_add(&wq->list,&workqueues);
spin_unlock(&workqueue_lock);
/*
* We must initialize cwqs for each possiblecpu even if we
*are going to call destroy_workqueue() finally. Otherwise
*cpu_up() can hit the uninitialized cwq once we drop the
*lock.
*/
for_each_possible_cpu(cpu) {
cwq =init_cpu_workqueue(wq, cpu);
if (err || !cpu_online(cpu))
continue;
err =create_workqueue_thread(cwq, cpu);
start_workqueue_thread(cwq,cpu);
}
cpu_maps_update_done();
}
if (err) {
destroy_workqueue(wq);
wq = NULL;
}
return wq;
}
struct cpu_workqueue_struct *cwq;
cwq->list_head这个队列里面的每个节点都是一个work_struct结构的实例
static int
create_workqueue_thread(struct cpu_workqueue_struct*cwq, int cpu)
{
struct sched_param param = {.sched_priority = MAX_RT_PRIO-1 };
struct workqueue_struct *wq = cwq->wq;
const char *fmt = is_wq_single_threaded(wq)? "%s" : "%s/%d";
struct task_struct *p;
p = kthread_create(worker_thread, cwq, fmt,wq->name, cpu);
if (IS_ERR(p))
return PTR_ERR(p);
if (cwq->wq->rt)
sched_setscheduler_nocheck(p, SCHED_FIFO,¶m);
cwq->thread = p;
trace_workqueue_creation(cwq->thread,cpu);
return 0;
}
//将进程cwq->thread指定给@cpu指定的处理器,并唤醒该进程
staticvoid
start_workqueue_thread(struct cpu_workqueue_struct*cwq, int cpu)
{
struct task_struct *p = cwq->thread;
if (p != NULL) {
if (cpu >= 0)
kthread_bind(p,cpu);
wake_up_process(p);
}
}
worker_thread睡眠在cwq->more_work等待队列上,每当有workstruct被插入cwq->worklist队列中之后,workthread会被唤醒,之后就会检查cwq->worklist队列里面是还有workstruct的实例,如果有则从cwq->worklist队列中取出workstruct,将其从队列中删除,然后执行workstruct中内嵌的函数,直到cwq->worklist队列为空。worker_thread会再次睡眠在cwq->more_work等待队列上
static int
worker_thread(void *__cwq)
{
struct cpu_workqueue_struct *cwq = __cwq;
DEFINE_WAIT(wait);
if (cwq->wq->freezeable)
set_freezable();
for (;;) {
prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
//如果cwq->worklist队列为空,则睡眠cwq->more_work等待队列上
if (!freezing(current) &&
!kthread_should_stop() &&
list_empty(&cwq->worklist))
schedule();
//当cwq->worklist等待队列在次被插入了等待的进程(不为空)的时候,他会
//被再次唤醒
finish_wait(&cwq->more_work,&wait);
try_to_freeze();
if (kthread_should_stop())
break;
//不断从cwq->worklist队列中取出workstruct,将其从队列中删除,
//然后执行workstruct中内嵌的函数,直到cwq->worklist队列为空。
run_workqueue(cwq);
}
return 0;
}
该函数负责从cwq->worklist队列中取出workstruct,将其从队列中删除,然后执行workstruct中内嵌的函数。
staticvoid
run_workqueue(struct cpu_workqueue_struct *cwq)
{
spin_lock_irq(&cwq->lock);
while (!list_empty(&cwq->worklist)){
//congcwq->worklist链表中提取表头的workstruct实例
struct work_struct *work = list_entry(cwq->worklist.next,
struct work_struct,entry);
work_func_t f = work->func;
trace_workqueue_execution(cwq->thread,work);
cwq->current_work= work;
//将表头的实例从cwq->worklist链表中删除,并重新初始化节点指针部分
list_del_init(cwq->worklist.next);
spin_unlock_irq(&cwq->lock);
BUG_ON(get_wq_data(work) != cwq);
//清除从 cwq->worklist链表中取出的workstruct的
//pending标志位
work_clear_pending(work);
//调用从 cwq->worklist链表中提取的workstruct中内嵌的函数
//也就是我们想要执行的动作
lock_map_acquire(&cwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
f(work);
lock_map_release(&lockdep_map);
lock_map_release(&cwq->wq->lockdep_map);
if (unlikely(in_atomic() ||lockdep_depth(current) > 0)) {
printk(KERN_ERR "BUG: workqueueleaked lock or atomic: "
"%s/0x%08x/%d\n",
current->comm,preempt_count(),
task_pid_nr(current));
printk(KERN_ERR " last function: ");
print_symbol("%s\n",(unsigned long)f);
debug_show_held_locks(current);
dump_stack();
}
spin_lock_irq(&cwq->lock);
cwq->current_work= NULL;
}
spin_unlock_irq(&cwq->lock);
}
queue_work
将work_struct添加到work_queue里面,并唤醒worker_thread,来处理这个work_struct.
queue_work() ..>>
..>> queue_work_on() ..>>
..>> __queue_work() ..>>
..>> insert_work() ..>>
..>> list_add_tail(&work->entry,head);
..>> wake_up(&cwq->more_work);
int
queue_work(struct workqueue_struct *wq, structwork_struct *work)
{
int ret;
ret = queue_work_on(get_cpu(), wq, work);
put_cpu();
return ret;
}
int
queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work)
{
int ret = 0;
if (!test_and_set_bit(WORK_STRUCT_PENDING,
work_data_bits(work)))
{
BUG_ON(!list_empty(&work->entry));
__queue_work(wq_per_cpu(wq, cpu), work);
ret = 1;
}
return ret;
}
static
structcpu_workqueue_struct *
wq_per_cpu( struct workqueue_struct *wq, int cpu)
{
if (unlikely(is_wq_single_threaded(wq)))
cpu = singlethread_cpu;
return per_cpu_ptr(wq->cpu_wq,cpu);
}
//将work_struct实例插入到cwq->worklist的末尾,并唤醒阻塞在
//cwq->more_work等待队列上的worker_thread
staticvoid
__queue_work(struct cpu_workqueue_struct *cwq,
structwork_struct *work)
{
unsigned long flags;
spin_lock_irqsave(&cwq->lock,flags);
insert_work(cwq,work, &cwq->worklist);
spin_unlock_irqrestore(&cwq->lock,flags);
}
staticvoid
insert_work(struct cpu_workqueue_struct *cwq,
struct work_struct *work, struct list_head *head)
{
trace_workqueue_insertion(cwq->thread,work);
set_wq_data(work, cwq);
/*
*Ensure that we get the right work->data if we see the
*result of list_add() below, see try_to_grab_pending().
*/
smp_wmb();
//将work_struct添加到cwq->worklist链表中
list_add_tail(&work->entry,head);
//唤醒阻塞在cwq->more_work的worker_thread
wake_up(&cwq->more_work);
}
#defineper_cpu_ptr(ptr, cpu)
SHIFT_PERCPU_PTR((ptr),per_cpu_offset((cpu)))
#define per_cpu_offset(x)(__per_cpu_offset[x])
unsigned long __per_cpu_offset[NR_CPUS]__read_mostly;
#defineSHIFT_PERCPU_PTR(__p, __offset)
RELOC_HIDE((__p), (__offset))
#define RELOC_HIDE(ptr, off) \
({ unsigned long __ptr; \
__ptr = (unsigned long) (ptr); \
(typeof(ptr)) (__ptr + (off)); })
所以per_cpu_ptr(wq->cpu_wq, cpu);就是:
({ unsigned long __ptr; \
__ptr = (unsigned long) (((wq->cpu_wq)));\
(typeof(((wq->cpu_wq)))) (__ptr + ((__per_cpu_offset[cpu])));})
实际上也就是
(structcpu_workqueue_struct *)
((unsigned long)(wq->cpu_wq)) + __per_cpu_offset[cpu]