X86架构:
arch/x86/include/asm/current.h
一:定义当前任务的全局变量current_task
percpu基本原理:静态的percpu变量使用DEFINE_PER_CPU()宏来定义,目的就是把这种类型的变量都放到section(".data..percpu")
#define DECLARE_PER_CPU(type, name) \
DECLARE_PER_CPU_SECTION(type, name, "")
#define DECLARE_PER_CPU_SECTION(type, name, sec) \
extern __PCPU_ATTRS(sec) __typeof__(type) name
#define __PCPU_ATTRS(sec) \
__percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \
PER_CPU_ATTRIBUTES
#ifdef CONFIG_SMP
#define PER_CPU_BASE_SECTION ".data..percpu"
#else
#define PER_CPU_BASE_SECTION ".data"
#endif
#define PER_CPU_ATTRIBUTES
例:
DECLARE_PER_CPU(struct task_struct *, current_task);
extern __attribute__((section(".data..percpu" ""))) __typeof__(struct task_struct *) current_task
static __always_inline struct task_struct *get_current(void)
{
return this_cpu_read_stable(current_task);
}
#define current get_current()
二:x86获取当前任务宏this_cpu_read_stable
arch/x86/include/asm/percpu.h
#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp)
#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
arch/x86/include/asm/percpu-defs.h
系统根据current_task指针的size返回具体的任务
#define __pcpu_size_call_return(stem, variable) \
({ \
typeof(variable) pscr_ret__; \
__verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: pscr_ret__ = stem##1(variable); break; \
case 2: pscr_ret__ = stem##2(variable); break; \
case 4: pscr_ret__ = stem##4(variable); break; \
case 8: pscr_ret__ = stem##8(variable); break; \
default: \
__bad_size_call_parameter(); break; \
} \
pscr_ret__; \
})
arch/x86/include/asm/percpu.h
根据具体任务size获取当前任务宏
#define percpu_stable_op(size, op, _var) \
({ \
__pcpu_type_##size pfo_val__; \
asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \
: [val] __pcpu_reg_##size("=", pfo_val__) \
: [var] "p" (&(_var))); \
(typeof(_var))(unsigned long) pfo_val__; \
})
以X86_64架构为例展开:64bit指针size为8Byte
this_cpu_read_stable(current_task):
-->__pcpu_size_call_return
-->this_cpu_read_stable_8
初步展开:
-->percpu_stable_op(8, "mov", current_task)
({ \
__pcpu_type_8 pfo_val__; \
asm(__pcpu_op2_8(mov, __percpu_arg(P[var]), "%[val]") \
: [val] __pcpu_reg_8("=", pfo_val__) \
: [var] "p" (&(_var))); \
(typeof(_var))(unsigned long) pfo_val__; \
})
进一步展开:
#define __pcpu_type_1 u8
#define __pcpu_type_2 u16
#define __pcpu_type_4 u32
#define __pcpu_type_8 u64
#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
#define __pcpu_reg_1(mod, x) mod "q" (x)
#define __pcpu_reg_2(mod, x) mod "r" (x)
#define __pcpu_reg_4(mod, x) mod "r" (x)
#define __pcpu_reg_8(mod, x) mod "r" (x)
#define __percpu_arg(x) __percpu_prefix "%" #x
#define __stringify_1(x...) #x
#define __stringify(x...) __stringify_1(x)
#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
__percpu_seg定义如下:
#ifdef CONFIG_X86_64 // CONFIG_SMP=y
#define __percpu_seg gs
#else
#define __percpu_seg fs
#endif
-->percpu_stable_op(8, "mov", current_task)
({
u64 pfo_val__;
asm("movq %%gs:%P[var]", "%[val]"
: [val] ="r"pfo_val__
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__;
})
X86 PerCPU变量基址(gs寄存器)的原理:
X86下有一个快捷方法,只需一条简单的汇编指令mov %gs:var就能取出某个percpu变量在当前cpu的值,非常高效。
unsigned long get_memory_value(unsigned long addr)
{
unsigned long value = 0 ;
__asm__ __volatile__ ("mov %0, %%rax\n\t"::"r"(addr));
__asm__ __volatile__ ("mov %gs:(%rax), %rax\n\t");
__asm__ __volatile__ ("mov %%rax, %[value]\n\t" :[value]"=r"(value));
return value;
}
unsigned long get_ksymbol_var(void)
{
unsigned long addr = kallsyms_lookup_name("global_symbol") ;
if(!addr) {
dbg("Can't found global_symbol symbols! \r\n") ;
return 0 ;
}
return get_memory_value(addr) ;
}
static long *get_current(void)
{
return ({
typeof(current_task) pscr_ret__;
do {
const void *__vpp_verify = (typeof((&(current_task)) + 0))((void *)0);
(void)__vpp_verify;
} while (0);
switch(sizeof(current_task))
{
case 1: pscr_ret__ = ({
u8 pfo_val__;
asm("movb %%gs:%P[var], %[val]\n\t"
: [val] "=q" (pfo_val__)
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__;
});
break;
case 2: pscr_ret__ = ({
u16 pfo_val__;
asm("movw %%gs:%P[var], %[val]\n\t"
: [val] "=r" (pfo_val__)
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__;
});
break;
case 4: pscr_ret__ = ({
u32 pfo_val__;
asm("movl %%gs:%P[var], %[val]\n\t"
: [val] "=r" (pfo_val__)
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__;
});
break;
case 8: pscr_ret__ = ({
u64 pfo_val__;
asm("movq %%gs:%P[var], %[val]"
: [val] "=r" (pfo_val__)
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__; });
break;
default:
break;
}
pscr_ret__;
});
}
宏替换后展开的代码:
#include <stdio.h>
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
#define __pcpu_type_1 u8
#define __pcpu_type_2 u16
#define __pcpu_type_4 u32
#define __pcpu_type_8 u64
#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
#define __pcpu_reg_1(mod, x) mod "q" (x)
#define __pcpu_reg_2(mod, x) mod "r" (x)
#define __pcpu_reg_4(mod, x) mod "r" (x)
#define __pcpu_reg_8(mod, x) mod "r" (x)
#define __percpu_seg gs
#define __stringify_1(x...) #x
#define __stringify(x...) __stringify_1(x)
#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
#define __percpu_arg(x) __percpu_prefix "%" #x
#define percpu_stable_op(size, op, _var) \
({ \
__pcpu_type_##size pfo_val__; \
asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \
: [val] __pcpu_reg_##size("=", pfo_val__) \
: [var] "p" (&(_var))); \
(typeof(_var))(unsigned long) pfo_val__; \
})
#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp)
#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
#define __verify_pcpu_ptr(ptr) \
do { \
const void *__vpp_verify = (typeof((ptr) + 0))NULL; \
(void)__vpp_verify; \
} while (0)
#define __pcpu_size_call_return(stem, variable) \
({ \
typeof(variable) pscr_ret__; \
__verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: pscr_ret__ = stem##1(variable); break; \
case 2: pscr_ret__ = stem##2(variable); break; \
case 4: pscr_ret__ = stem##4(variable); break; \
case 8: pscr_ret__ = stem##8(variable); break; \
default: \
break; \
} \
pscr_ret__; \
})
#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
long current_task_var = 0xAA55AA55BBCCDDEE;
long *current_task = ¤t_task_var;
#if 0
static long *get_current(void)
{
printf("get_current==>宏替换\n");
return this_cpu_read_stable(current_task);
}
#else
static long *get_current(void)
{
printf("get_current==>内联汇编\n");
return ({
typeof(current_task) pscr_ret__;
do {
const void *__vpp_verify = (typeof((&(current_task)) + 0))((void *)0);
(void)__vpp_verify;
} while (0);
switch(sizeof(current_task))
{
case 1: pscr_ret__ = ({
u8 pfo_val__;
asm("movb %%gs:%P[var], %[val]\n\t"
: [val] "=q" (pfo_val__)
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__;
});
break;
case 2: pscr_ret__ = ({
u16 pfo_val__;
asm("movw %%gs:%P[var], %[val]\n\t"
: [val] "=r" (pfo_val__)
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__;
});
break;
case 4: pscr_ret__ = ({
u32 pfo_val__;
asm("movl %%gs:%P[var], %[val]\n\t"
: [val] "=r" (pfo_val__)
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__;
});
break;
case 8: pscr_ret__ = ({
u64 pfo_val__;
asm("movq %%gs:%P[var], %[val]\n\t"
: [val] "=r" (pfo_val__)
: [var] "p" (&(current_task)));
(typeof(current_task))(unsigned long) pfo_val__; });
break;
default:
break;
}
pscr_ret__;
});
}
#endif
#define current get_current()
unsigned long get_mem_value(unsigned long addr)
{
unsigned long value = 0 ;
__asm__ __volatile__ ("mov %0, %%rax\n\t"::"r"(addr));
__asm__ __volatile__ ("mov %gs:(%rax), %rax\n\t");
__asm__ __volatile__ ("mov %%rax, %[value]\n\t" :[value]"=r"(value)) ;
return value ;
}
unsigned long get_ksyms_var(unsigned long addr) {
if(!addr) {
printf("Can't found ksyms_var symbols! \r\n") ;
return 0 ;
}
return get_mem_value(addr) ;
}
int main(int argc, char *argv[])
{
printf("current_task: %p\n", current_task);
printf("current: %p\n", current);
printf("get_mem_value: 0x%lx\n", get_mem_value((unsigned long)¤t_task_var));
return 0;
}