多核中percpu

最新推荐文章于 2024-04-26 19:57:34 发布

bingqingsuimeng

最新推荐文章于 2024-04-26 19:57:34 发布

阅读量5.1k

点赞数

分类专栏： linux内核中驱动相关机制文章标签： struct attributes 数据结构 linker x86 module

linux内核中驱动相关机制专栏收录该内容

13 篇文章 1 订阅

订阅专栏

What is percpu data?
percpu data 是内核为smp系统中不同CPU之间的数据保护方式，系统为每个CPU维护一段私有的空间，在这段空间中的数据只有这个CPU能访问。但是这种方式不提供对异步函数访问的保护，因此在同一个CPU上还要另外的同步原语的协作。

arch/x86/kernel/vmlinux.lds中有:
.....
/* will be freed after init
   * Following ALIGN() is required to make sure no other data falls on the
   * same page where __smp_alt_end is pointing as that page might be freed
   * after boot. Always make sure that ALIGN() directive is present after
   * the section which contains __smp_alt_end.
   */
. = ALIGN(PAGE_SIZE);

/* will be freed after init */
. = ALIGN(PAGE_SIZE);       /* Init code and data */
....
....省略若干行
....
. = ALIGN(PAGE_SIZE);
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
    __per_cpu_start = .;
    *(.data.percpu.page_aligned)
    *(.data.percpu)
    *(.data.percpu.shared_aligned)
    __per_cpu_end = .;
}
. = ALIGN(PAGE_SIZE);
/* freed after init ends here */

        这说明__per_cpu_start和__per_cpu_end标识.data.percpu这个section的开头和结尾。并且，整个. data.percpu这个section都在__init_begin和__init_end之间，也就是说，该section所占内存会在系统启动后释放(free)掉。

<include/linux/percpu.h>

#define DEFINE_PER_CPU(type, name)                    \
    __attribute__((__section__(".data.percpu")))            \
    PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name

在x86和ARM中，PER_CPU_ATTRIBUTES定义为空宏，所以
static DEFINE_PER_CPU(struct runqueue, runqueues);
会扩展成
__attribute__((__section__(".data.percpu"))) __typeof__(struct runqueue) per_cpu__runqueues;
也就是在.data.percpu这个section中定义了一个变量per_cpu__runqueues，其类型是struct runqueue。
事实上，这里所谓的变量per_cpu__runqueues，其实就是相对于__per_cpu_start的偏移量。（在x86中是段＋偏移的寻址方式，在ARM中如何？）

初始化函数
在start_kernel()函数中会调用setup_per_cpu_areas()

#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;

EXPORT_SYMBOL(__per_cpu_offset);一个全局变量

static void __init setup_per_cpu_areas(void)
{
    unsigned long size, i;
    char *ptr;
    unsigned long nr_possible_cpus = num_possible_cpus();

    /* Copy section for each CPU (we discard the original) */
    size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
    ptr = alloc_bootmem_pages(size * nr_possible_cpus);

    for_each_possible_cpu(i) {
        __per_cpu_offset[i] = ptr - __per_cpu_start;
        memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
        ptr += size;
    }
}
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */

        在该函数中，为每个CPU分配一段内存，并将.data.percpu中的数据拷贝到其中，每个CPU各有一份，其中CPU n对应的专有数据区的首地址为__per_cpu_offset[n]。这样，前述相应于__per_cpu_start的偏移量 per_cpu__runqueues就变成了相应于__per_cpu_offset[n]的偏移量，这样.data.percpu这个section 在系统初始化后就可以释放了。

在percpu.h中
#define PERCPU_ENOUGH_ROOM                        \
    (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE)

#define PERCPU_MODULE_RESERVE    8192 保留空间

数据结构

<arch/x86/include/asm/pda.h>
/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
    struct task_struct *pcurrent;    /* 0 Current process */
    unsigned long data_offset;    /* 8 Per cpu data offset from linker
                       address */
    unsigned long kernelstack;    /* 16 top of kernel stack for current */
    unsigned long oldrsp;        /* 24 user rsp for system call */
    int irqcount;            /* 32 Irq nesting counter. Starts -1 */
    unsigned int cpunumber;        /* 36 Logical CPU number */
#ifdef CONFIG_CC_STACKPROTECTOR
    unsigned long stack_canary;    /* 40 stack canary value */
                    /* gcc-ABI: this canary MUST be at
                       offset 40!!! */
#endif
    char *irqstackptr;
    short nodenumber;        /* number of current node (32k max) */
    short in_bootmem;        /* pda lives in bootmem */
    unsigned int __softirq_pending;
    unsigned int __nmi_count;    /* number of NMI on this CPUs */
    short mmu_state;
    short isidle;
    struct mm_struct *active_mm;
    unsigned apic_timer_irqs;
    unsigned irq0_irqs;
    unsigned irq_resched_count;
    unsigned irq_call_count;
    unsigned irq_tlb_count;
    unsigned irq_thermal_count;
    unsigned irq_threshold_count;
    unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;

extern struct x8664_pda **_cpu_pda;
extern void pda_init(int);

#define cpu_pda(i) (_cpu_pda[i])

操作函数、宏

<include/asm-generic/percpu.h>
#define per_cpu(var, cpu) \
(*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))

所以这个宏展开为：
#define per_cpu(var,cpu)\
(*RELOC_HIDE(&per_cpu_varvar,cpu_pda(cpu)->data_offset))
即：
per_cpu_varvar[cpu_pda(cpu)->data_offset]

因此，per_cpu这个宏的功能是：为cpu选择一个
每CPU数组元素，数组名为per_cpu__varvar.

<include/asm-generic/percpu.h>
/*
* Add a offset to a pointer but keep the pointer as is.
*
* Only S390 provides its own means of moving the pointer.
*/
#ifndef SHIFT_PERCPU_PTR
#define SHIFT_PERCPU_PTR(__p, __offset)    RELOC_HIDE((__p), (__offset))
#endif
=========================================================
<include/linux/compiler-gcc.h>
/* This macro obfuscates arithmetic on a variable address so that gcc
   shouldn't recognize the original var, and make assumptions about it */

#define RELOC_HIDE(ptr, off)                    \
({ unsigned long __ptr;                      \
    __asm__ ("" : "=r"(__ptr) : "0"(ptr));        \
    (typeof(ptr)) (__ptr + (off)); })
这个宏返回一个ptr型的指针，指向ptr+off
=========================================================
<include/asm-generic/percpu.h>
#define per_cpu_var(var) per_cpu__##var
///这个宏就定义一变量，
如#define per_cpu_var(runqueues) per_cpu__runqueuesvar
==========================================================

#define per_cpu_offset(x) (__per_cpu_offset(x))
#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
data_offset-----Per cpu data offset from linker address.链接时候给定
的这个变量的偏移地址,也就是这个变量的名字
extern struct x8664_pda **_cpu_pda;
#define cpu_pda(i) (_cpu_pda[i])全局变量
struct x8664_pda 是一个Per processor datastructure.用来描述一个percpu data.
这个数据结构的描述见“数据结构”一节。
注：这个结构只是在x86体系结构下面的，在ARM下如何？？？

get_cpu_var(var) 和 __get_cpu_var(var)

#define get_cpu_var(var) (*({                \
    extern int simple_identifier_##var(void);    \
    preempt_disable();                \                         禁止内核抢占
    &__get_cpu_var(var); }))
==================================================
#define __get_cpu_var(var) \
    (*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
相当于：*per_cpu_varvar[my_cpu_offset]

put_cpu_var(var)

#define put_cpu_var(var) preempt_enable()仅仅是启动内核抢占！！！什么意思呢？

alloc_percpu(type) 动态分配type类型的每CPU数组，返回其地址。

#define alloc_percpu(type)        (type *)__alloc_percpu(sizeof(type))
#define __alloc_percpu(size)        percpu_alloc_mask((size), GFP_KERNEL, cpu_possible_map)
#define percpu_alloc_mask(size, gfp, mask)        __percpu_alloc_mask((size), (gfp), &(mask))
在mm/allocpercpu.c中定义：
/**
* percpu_alloc_mask - initial setup of per-cpu data
* @size: size of per-cpu object
* @gfp: may sleep or not etc.
* @mask: populate per-data for cpu's selected through mask bits
*
* Populating per-cpu data for all online cpu's would be a typical use case,
* which is simplified by the percpu_alloc() wrapper.
* Per-cpu objects are populated with zeroed buffers.
*/
void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
{
    /*
    * We allocate whole cache lines to avoid false sharing
    */
    size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
    void *pdata = kzalloc(sz, gfp);
    void *__pdata = __percpu_disguise(pdata);

    if (unlikely(!pdata))
        return NULL;
    if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
        return __pdata;
    kfree(pdata);
    return NULL;
}
EXPORT_SYMBOL_GPL(__percpu_alloc_mask);

bingqingsuimeng

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
多核中percpu

What is percpu data? percpu data 是内核为smp系统中不同CPU之间的数据保护方式，系统为每个CPU维护一段私有的空间，在这段空间中的数据只有这个CPU能访问。但是这种方式不提供对异步函数访问的保护，因此在同一个CPU上还要另外的同步原语的协作。arch/x86/kernel/vmlinux.lds中有:.....
复制链接

扫一扫

专栏目录