Linux 内核学习之内存管理(一) 总体描述

Linux 内存映射

下面引用两张网上找到的图片,总结了Linux kernel的内存管理机制。
这里写图片描述

这里写图片描述
总体来说linux线性地址分为

  • 0-16MB DMA
  • 16-896MB Normal
  • 896-4096MB 高端内存 Highmem
    其中: 896-1024MB 用来实现 固定映射、永久映射和高端内存的临时映射。

页描述符
struct page 用来记录每个页框状态之类的。

mem_map
数组,所有页描述符都存在这里。

struct page {
    /* First double word block */
    unsigned long flags;        /* Atomic flags, some possibly
                     * updated asynchronously */
    union {
        struct address_space *mapping;  /* If low bit clear, points to
                         * inode address_space, or NULL.
                         * If page mapped as anonymous
                         * memory, low bit is set, and
                         * it points to anon_vma object:
                         * see PAGE_MAPPING_ANON below.
                         */
        void *s_mem;            /* slab first object */
    };

    /* Second double word */
    struct {
        union {
            pgoff_t index;      /* Our offset within mapping. */
            void *freelist;     /* sl[aou]b first free object */
            bool pfmemalloc;    /* If set by the page allocator,
                         * ALLOC_NO_WATERMARKS was set
                         * and the low watermark was not
                         * met implying that the system
                         * is under some pressure. The
                         * caller should try ensure
                         * this page is only used to
                         * free other pages.
                         */
        };

        union {
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
            /* Used for cmpxchg_double in slub */
            unsigned long counters;
#else
            /*
             * Keep _count separate from slub cmpxchg_double data.
             * As the rest of the double word is protected by
             * slab_lock but _count is not.
             */
            unsigned counters;
#endif

            struct {

                union {
                    /*
                     * Count of ptes mapped in
                     * mms, to show when page is
                     * mapped & limit reverse map
                     * searches.
                     *
                     * Used also for tail pages
                     * refcounting instead of
                     * _count. Tail pages cannot
                     * be mapped and keeping the
                     * tail page _count zero at
                     * all times guarantees
                     * get_page_unless_zero() will
                     * never succeed on tail
                     * pages.
                     */
                    atomic_t _mapcount;

                    struct { /* SLUB */
                        unsigned inuse:16;
                        unsigned objects:15;
                        unsigned frozen:1;
                    };
                    int units;  /* SLOB */
                };
                atomic_t _count;        /* Usage count, see below. */
            };
            unsigned int active;    /* SLAB */
        };
    };

    /* Third double word block */
    union {
        struct list_head lru;   /* Pageout list, eg. active_list
                     * protected by zone->lru_lock !
                     * Can be used as a generic list
                     * by the page owner.
                     */
        struct {        /* slub per cpu partial pages */
            struct page *next;  /* Next partial slab */
#ifdef CONFIG_64BIT
            int pages;  /* Nr of partial slabs left */
            int pobjects;   /* Approximate # of objects */
#else
            short int pages;
            short int pobjects;
#endif
        };

        struct slab *slab_page; /* slab fields */
        struct rcu_head rcu_head;   /* Used by SLAB
                         * when destroying via RCU
                         */
        /* First tail page of compound page */
        struct {
            compound_page_dtor *compound_dtor;
            unsigned long compound_order;
        };

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
        pgtable_t pmd_huge_pte; /* protected by page->ptl */
#endif
    };

    /* Remainder is not double word aligned */
    union {
        unsigned long private;      /* Mapping-private opaque data:
                         * usually used for buffer_heads
                         * if PagePrivate set; used for
                         * swp_entry_t if PageSwapCache;
                         * indicates order in the buddy
                         * system if PG_buddy is set.
                         */
#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
        spinlock_t *ptl;
#else
        spinlock_t ptl;
#endif
#endif
        struct kmem_cache *slab_cache;  /* SL[AU]B: Pointer to slab */
        struct page *first_page;    /* Compound tail pages */
    };

#ifdef CONFIG_MEMCG
    struct mem_cgroup *mem_cgroup;
#endif

    /*
     * On machines where all RAM is mapped into kernel address space,
     * we can simply calculate the virtual address. On machines with
     * highmem some memory is mapped into kernel virtual memory
     * dynamically, so we need a place to store that address.
     * Note that this field could be 16 bits on x86 ... ;)
     *
     * Architectures with slow multiplication can define
     * WANT_PAGE_VIRTUAL in asm/page.h
     */
#if defined(WANT_PAGE_VIRTUAL)
    void *virtual;          /* Kernel virtual address (NULL if
                       not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */

#ifdef CONFIG_KMEMCHECK
    /*
     * kmemcheck wants to track the status of each byte in a page; this
     * is a pointer to such a status block. NULL if not tracked.
     */
    void *shadow;
#endif

#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
    int _last_cpupid;
#endif
}
重点关注下 
__count 字段
   页的应用计数器,如果为-1 表示空闲。>=0 分配了给一个或多个进程,或存放的内核数据结构。
flags
   页的状态描述标志。     

高端内存映射用到了

  • 永久映射
  • 固定映射
  • 临时映射

先理解下pgd pud pmd pte
32 未linux内核 不开PAE的话只用到了 pgd 和 pte

#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) 
swapper_pg_dir + pgd_idx -> pte 
*pte & 0xfffff000 就是页框的物理地址 //4kb对齐

反过来
物理地址| flags -> *pte 
pte | flags -> *(swapper_pg_dir + pgd_idx)
这样就建立了映射

enum fixed_addresses是预留为了给固定映射预留空间。通过这个索引能快速找到映射的全局目录项。FIX_KMAP_BEGIN - FIX_KMAP_END就是为了完成临时映射预留的。

enum fixed_addresses {
#ifdef CONFIG_X86_32
    FIX_HOLE,
#else
#ifdef CONFIG_X86_VSYSCALL_EMULATION
    VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
#endif
#ifdef CONFIG_PARAVIRT_CLOCK
    PVCLOCK_FIXMAP_BEGIN,
    PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
#endif
#endif
    FIX_DBGP_BASE,
    FIX_EARLYCON_MEM_BASE,
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
    FIX_OHCI1394_BASE,
#endif
#ifdef CONFIG_X86_LOCAL_APIC
    FIX_APIC_BASE,  /* local (CPU) APIC) -- required for SMP or not */
#endif
#ifdef CONFIG_X86_IO_APIC
    FIX_IO_APIC_BASE_0,
    FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
    FIX_RO_IDT, /* Virtual mapping for read-only IDT */
#ifdef CONFIG_X86_32
    FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */  //临时映射???
    FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#ifdef CONFIG_PCI_MMCONFIG
    FIX_PCIE_MCFG,
#endif
#endif
#ifdef CONFIG_PARAVIRT
    FIX_PARAVIRT_BOOTMAP,
#endif
    FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
    FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
#ifdef  CONFIG_X86_INTEL_MID
    FIX_LNW_VRTC,
#endif
    __end_of_permanent_fixed_addresses,

    /*
     * 512 temporary boot-time mappings, used by early_ioremap(),
     * before ioremap() is functional.
     *
     * If necessary we round it up to the next 512 pages boundary so
     * that we can have a single pgd entry and a single pte table:
     */
#define NR_FIX_BTMAPS       64
#define FIX_BTMAPS_SLOTS    8
#define TOTAL_FIX_BTMAPS    (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
    FIX_BTMAP_END =
     (__end_of_permanent_fixed_addresses ^
      (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) &
     -PTRS_PER_PTE
     ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS -
       (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))
     : __end_of_permanent_fixed_addresses,
    FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
#ifdef CONFIG_X86_32
    FIX_WP_TEST,
#endif
#ifdef CONFIG_INTEL_TXT
    FIX_TBOOT_BASE,
#endif
    __end_of_fixed_addresses
};

通过这两个宏来实现线性地址与索引之间的转换
#define __fix_to_virt(x)    (FIXADDR_TOP - ((x) << PAGE_SHIFT))
#define __virt_to_fix(x)    ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)

永久映射

void *kmap(struct page *page)
{
    might_sleep();
    if (!PageHighMem(page))
        return page_address(page);
    return kmap_high(page);
}

void *kmap_high(struct page *page)
{
    unsigned long vaddr;

    /*
     * For highmem pages, we can't trust "virtual" until
     * after we have the lock.
     */
    lock_kmap();
    vaddr = (unsigned long)page_address(page);
    if (!vaddr)
        vaddr = map_new_virtual(page);
    pkmap_count[PKMAP_NR(vaddr)]++;
    BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
    unlock_kmap();
    return (void*) vaddr;
}

固定映射

临时映射

static void __init kmap_init(void)
{
    unsigned long kmap_vstart;

    /*
     * Cache the first kmap pte:
     */
    kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
    kmap_pte = kmap_get_fixmap_pte(kmap_vstart);

    kmap_prot = PAGE_KERNEL;
}
void *kmap_atomic(struct page *page)
{
    return kmap_atomic_prot(page, kmap_prot);
}
void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
    unsigned long vaddr;
    int idx, type;

    preempt_disable();
    pagefault_disable();

    if (!PageHighMem(page))
        return page_address(page);

    type = kmap_atomic_idx_push();
    idx = type + KM_TYPE_NR*smp_processor_id();
    vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);   //线性地址 最后 1023项 
    BUG_ON(!pte_none(*(kmap_pte-idx)));
    set_pte(kmap_pte-idx, mk_pte(page, prot));
    arch_flush_lazy_mmu_mode();

    return (void *)vaddr;
}

内核用到的机制

  • 非一致内存访问 numa
  • 伙伴系统
  • slab系统
  • 每页cpu高速缓存
  • TLB
  • 保留的页框池
  • 内存池
  • 非连续内存管理
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值