memory managerment slab allocator(3)

/*何时进行的检查?

 *1]当申请object时候

 **/
static __always_inline void *
__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller){
    cache_alloc_debugcheck_before(cachep, flags);
    local_irq_save(save_flags);
    objp = __do_cache_alloc(cachep, flags);
    local_irq_restore(save_flags);
    objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
}

 2]destroy and release all objects in a slab

/**
 * slab_destroy - destroy and release all objects in a slab
 * @cachep: cache pointer being destroyed
 * @slabp: slab pointer being destroyed
 *
 * Destroy all the objs in a slab, and release the mem back to the system.
 * Before calling the slab must have been unlinked from the cache.  The
 * cache-lock is not held/needed.
 */
static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
{
    void *addr = slabp->s_mem - slabp->colouroff;

    slab_destroy_debugcheck(cachep, slabp);
    if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
        struct slab_rcu *slab_rcu;

        slab_rcu = (struct slab_rcu *)slabp;
        slab_rcu->cachep = cachep;
        slab_rcu->addr = addr;
        call_rcu(&slab_rcu->head, kmem_rcu_free);
    } else {
        kmem_freepages(cachep, addr);
        if (OFF_SLAB(cachep))
            kmem_cache_free(cachep->slabp_cache, slabp);
    }
}

3] Deallocate an object to cache

/**
 * kmem_cache_free - Deallocate an object
 * @cachep: The cache the allocation was from.
 * @objp: The previously allocated object.
 *
 * Free an object which was previously allocated from this
 * cache.
 */
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
    unsigned long flags;

    local_irq_save(flags);
    debug_check_no_locks_freed(objp, obj_size(cachep));
    if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
        debug_check_no_obj_freed(objp, obj_size(cachep));
    __cache_free(cachep, objp, __builtin_return_address(0));
    local_irq_restore(flags);

    trace_kmem_cache_free(_RET_IP_, objp);
}


/*
 * Release an obj back to its cache. If the obj has a constructed state, it must
 * be in this state _before_ it is released.  Called with disabled ints.
 */
static inline void __cache_free(struct kmem_cache *cachep, void *objp,
    void *caller)
{
    struct array_cache *ac = cpu_cache_get(cachep);

    check_irq_off();
    kmemleak_free_recursive(objp, cachep->flags);
    objp = cache_free_debugcheck(cachep, objp, caller);

    kmemcheck_slab_free(cachep, objp, obj_size(cachep));

    /*
     * Skip calling cache_free_alien() when the platform is not numa.
     * This will avoid cache misses that happen while accessing slabp (which
     * is per page memory  reference) to get nodeid. Instead use a global
     * variable to skip the call, which is mostly likely to be present in
     * the cache.
     */
    if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
        return;

    if (likely(ac->avail < ac->limit)) {
        STATS_INC_FREEHIT(cachep);
    } else {
        STATS_INC_FREEMISS(cachep);
        cache_flusharray(cachep, ac);
    }

    ac->entry[ac->avail++] = objp;

}

检查的内容是什么?

当申请object时候 -> cache_alloc_debugcheck_before ; cache_alloc_debugcheck_after

cache_alloc_debugcheck_before

/*这里只是对memory申请flag的判断*/
static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
                        gfp_t flags)
{
    might_sleep_if(flags & __GFP_WAIT);
#if DEBUG
    kmem_flagcheck(cachep, flags);
#endif
}

cache_alloc_debugcheck_after

static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                gfp_t flags, void *objp, void *caller)
{
    if (!objp)
        return objp;
    /*
     1. 判断check_poison_obj【此时应该是free状态 0x6b】
     2. 设置poison_obj【设置INUSE状态 0x5a】
     */
    if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
        if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
            kernel_map_pages(virt_to_page(objp),
                     cachep->buffer_size / PAGE_SIZE, 1);
        else
            check_poison_obj(cachep, objp);
#else
        check_poison_obj(cachep, objp);
#endif
        poison_obj(cachep, objp, POISON_INUSE);
    }
    /* 设置userword,谁调用的该函数
     */
    if (cachep->flags & SLAB_STORE_USER)
        *dbg_userword(cachep, objp) = caller;
    /*1.判断当前的red zone should red_inactive
          2.设置red zone设置成RED_ACTIVE
         */
    if (cachep->flags & SLAB_RED_ZONE) {
        if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
                *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
            slab_error(cachep, "double free, or memory outside"
                        " object was overwritten");
            printk(KERN_ERR
                "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
                objp, *dbg_redzone1(cachep, objp),
                *dbg_redzone2(cachep, objp));
        }
        *dbg_redzone1(cachep, objp) = RED_ACTIVE;
        *dbg_redzone2(cachep, objp) = RED_ACTIVE;
    }
#ifdef CONFIG_DEBUG_SLAB_LEAK
    {
        struct slab *slabp;
        unsigned objnr;

        slabp = page_get_slab(virt_to_head_page(objp));
        objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
        slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
    }
#endif
    objp += obj_offset(cachep);
    if (cachep->ctor && cachep->flags & SLAB_POISON)
        cachep->ctor(objp);
    if (ARCH_SLAB_MINALIGN &&
        ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
        printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
               objp, (int)ARCH_SLAB_MINALIGN);
    }
    return objp;
}

cache_free_debugcheck

static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
                   void *caller)
{
    struct page *page;
    unsigned int objnr;
    struct slab *slabp;
    /*从object得到kmem_cache
     1.从kernel address -> page -> lru.next
    static inline struct kmem_cache *virt_to_cache(const void *obj)
    {
        struct page *page = virt_to_head_page(obj);
        return page_get_cache(page);
    }
    static inline struct kmem_cache *page_get_cache(struct page *page)
    {
        page = compound_head(page);
        BUG_ON(!PageSlab(page));
        return (struct kmem_cache *)page->lru.next;
    }
     **/
    BUG_ON(virt_to_cache(objp) != cachep);
    /*
     * memory layout of objects:
     * 0        : objp
     * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
     *         the end of an object is aligned with the end of the real
     *         allocation. Catches writes behind the end of the allocation.
     * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
     *         redzone word.
     * cachep->obj_offset: The real object.
     * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
     * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address
     *                    [BYTES_PER_WORD long]
     *

    static int obj_offset(struct kmem_cache *cachep)
    {
        return cachep->obj_offset;
    }
     */
    objp -= obj_offset(cachep);

    /*kfree_debugcheck对kaddress的判断*/
    kfree_debugcheck(objp);

    /*object -> page -> page->lru.prev:就是说从object可以得到对应的slab and cache
    static inline struct slab *page_get_slab(struct page *page)
    {
        BUG_ON(!PageSlab(page));
        return (struct slab *)page->lru.prev;
    }
    */
    page = virt_to_head_page(objp);
    slabp = page_get_slab(page);

    /*判断此时red zone是否为 RED_ACTIVE?如果不是则打印error信息
         *并更改red zone -> RED_INACTIVE
     **/
    if (cachep->flags & SLAB_RED_ZONE) {
        verify_redzone_free(cachep, objp);
        *dbg_redzone1(cachep, objp) = RED_INACTIVE;
        *dbg_redzone2(cachep, objp) = RED_INACTIVE;
    }
    /*更改调用free的caller*/
    if (cachep->flags & SLAB_STORE_USER)
        *dbg_userword(cachep, objp) = caller;
    /*判断object的index
     * We want to avoid an expensive divide : (offset / cache->buffer_size)
     *   Using the fact that buffer_size is a constant for a particular cache,
     *   we can replace (offset / cache->buffer_size) by
     *   reciprocal_divide(offset, cache->reciprocal_buffer_size)

    static inline unsigned int obj_to_index(const struct kmem_cache *cache,
                        const struct slab *slab, void *obj)
    {
        u32 offset = (obj - slab->s_mem);
        return reciprocal_divide(offset, cache->reciprocal_buffer_size);
    }
    static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
                     unsigned int idx)
    {
        return slab->s_mem + cache->buffer_size * idx;
    }
    **/
    objnr = obj_to_index(cachep, slabp, objp);
    BUG_ON(objnr >= cachep->num);
    BUG_ON(objp != index_to_obj(cachep, slabp, objnr));

#ifdef CONFIG_DEBUG_SLAB_LEAK
    slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
#endif
    /*1]buffer size是 page_size的整数倍,且是在object的外部page保存,此时
    static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
                    unsigned long caller)
    {
        int size = obj_size(cachep);

        addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];

        if (size < 5 * sizeof(unsigned long))
            return;

        *addr++ = 0x12345678;
        *addr++ = caller;
        *addr++ = smp_processor_id();
        size -= 3 * sizeof(unsigned long);
        {
            unsigned long *sptr = &caller;
            unsigned long svalue;

            while (!kstack_end(sptr)) {
                svalue = *sptr++;
                if (kernel_text_address(svalue)) {
                    *addr++ = svalue;
                    size -= sizeof(unsigned long);
                    if (size <= sizeof(unsigned long))
                        break;
                }
            }

        }
        *addr++ = 0x87654321;
    }
    2.另外的buffer size
    **/

    if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
        if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
            store_stackinfo(cachep, objp, (unsigned long)caller);
            kernel_map_pages(virt_to_page(objp),
                     cachep->buffer_size / PAGE_SIZE, 0);
        } else {
            poison_obj(cachep, objp, POISON_FREE);
        }
#else
        poison_obj(cachep, objp, POISON_FREE);
#endif
    }
    return objp;
}


slab_destory_debugcheck

/* slab and object index -> object转换成对 object的判断
 **/
static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
{
    int i;
    for (i = 0; i < cachep->num; i++) {
        void *objp = index_to_obj(cachep, slabp, i);

        if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
            if (cachep->buffer_size % PAGE_SIZE == 0 &&
                    OFF_SLAB(cachep))
                kernel_map_pages(virt_to_page(objp),
                    cachep->buffer_size / PAGE_SIZE, 1);
            else
                check_poison_obj(cachep, objp);
#else
            check_poison_obj(cachep, objp);
#endif
        }
        if (cachep->flags & SLAB_RED_ZONE) {
            if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
                slab_error(cachep, "start of a freed object "
                       "was overwritten");
            if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
                slab_error(cachep, "end of a freed object "
                       "was overwritten");
        }
    }
}

kfree_debugcheck:

static void kfree_debugcheck(const void *objp)
{
    if (!virt_addr_valid(objp)) {
        printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
               (unsigned long)objp);
        BUG();
    }
}
#define virt_addr_valid(kaddr)    ((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
}


出错后是怎样处理的

static void check_poison_obj(struct kmem_cache *cachep, void *objp)
{
    char *realobj;
    int size, i;
    int lines = 0;

    realobj = (char *)objp + obj_offset(cachep);
    size = obj_size(cachep);

    for (i = 0; i < size; i++) {
        char exp = POISON_FREE;
        if (i == size - 1)
            exp = POISON_END;
        if (realobj[i] != exp) {
            int limit;
            /* Mismatch ! */
            /* Print header */
            if (lines == 0) {
                printk(KERN_ERR
                    "Slab corruption (%s): %s start=%p, len=%d\n",
                    print_tainted(), cachep->name, realobj, size);
                print_objinfo(cachep, objp, 0);
            }
            /* Hexdump the affected line */
            i = (i / 16) * 16;
            limit = 16;
            if (i + limit > size)
                limit = size - i;
            dump_line(realobj, i, limit);
            i += 16;
            lines++;
            /* Limit to 5 lines */
            if (lines > 5)
                break;
        }
    }
    if (lines != 0) {
        /* Print some data about the neighboring objects, if they
         * exist:
         */
        struct slab *slabp = virt_to_slab(objp);
        unsigned int objnr;
        /*print the previous object*/
        objnr = obj_to_index(cachep, slabp, objp);
        if (objnr) {
            objp = index_to_obj(cachep, slabp, objnr - 1);
            realobj = (char *)objp + obj_offset(cachep);
            printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
                   realobj, size);
            print_objinfo(cachep, objp, 2);
        }
        /*print the next object*/
        if (objnr + 1 < cachep->num) {
            objp = index_to_obj(cachep, slabp, objnr + 1);
            realobj = (char *)objp + obj_offset(cachep);
            printk(KERN_ERR "Next obj: start=%p, len=%d\n",
                   realobj, size);
            print_objinfo(cachep, objp, 2);
        }
        BUG_ON(1);
    }
}


static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
{
    int i, size;
    char *realobj;
    /*print the read zone*/
    if (cachep->flags & SLAB_RED_ZONE) {
        printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
            *dbg_redzone1(cachep, objp),
            *dbg_redzone2(cachep, objp));
    }
    /*print the user*/
    if (cachep->flags & SLAB_STORE_USER) {
        printk(KERN_ERR "Last user: [<%p>]",
            *dbg_userword(cachep, objp));
        print_symbol("(%s)",
                (unsigned long)*dbg_userword(cachep, objp));
        printk("\n");
    }
    realobj = (char *)objp + obj_offset(cachep);
    size = obj_size(cachep);
    for (i = 0; i < size && lines; i += 16, lines--) {
        int limit;
        limit = 16;
        if (i + limit > size)
            limit = size - i;
        dump_line(realobj, i, limit);
    }
}

/*print the object element*/
static void dump_line(char *data, int offset, int limit)
{
    int i;
    unsigned char error = 0;
    int bad_count = 0;

    printk(KERN_ERR "%03x: ", offset);
    for (i = 0; i < limit; i++) {
        if (data[offset + i] != POISON_FREE) {
            error = data[offset + i];
            bad_count++;
        }
    }
    print_hex_dump(KERN_CONT, "", 0, 16, 1,
            &data[offset], limit, 1);

    if (bad_count == 1) {
        error ^= POISON_FREE;
        if (!(error & (error - 1))) {
            printk(KERN_ERR "Single bit error detected. Probably "
                    "bad RAM.\n");
        }
    }


为什么有些没有打印出 user caller

struct kmem_cache *
kmem_cache_create (const char *name, size_t size, size_t align,
    unsigned long flags, void (*ctor)(void *))
    
    -->

设置user标志位

    /*
     * Enable redzoning and last user accounting, except for caches with
     * large objects, if the increased size would increase the object size
     * above the next power of two: caches with object sizes just above a
     * power of two have a significant amount of internal fragmentation.
     */

    if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
                        2 * sizeof(unsigned long long)))
        flags |= SLAB_RED_ZONE | SLAB_STORE_USER;

    /* disable debug if necessary */
    if (ralign > __alignof__(unsigned long long))
        flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);

    if (flags & CFLGS_OFF_SLAB) {
        /* really off slab. No need for manual alignment */
        slab_size = cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
    #ifdef CONFIG_PAGE_POISONING
        /* If we're going to use the generic kernel_map_pages()
         * poisoning, then it's going to smash the contents of
         * the redzone and userword anyhow, so switch them off.
         */
        if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
            flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
    #endif
    }

对 object page 的处理

cache_free_debugcheck
    if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
        if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
            store_stackinfo(cachep, objp, (unsigned long)caller);
            kernel_map_pages(virt_to_page(objp),
                     cachep->buffer_size / PAGE_SIZE, 0);
        } else {
            poison_obj(cachep, objp, POISON_FREE);
        }
#else
        poison_obj(cachep, objp, POISON_FREE);
#endif
    }

static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
                unsigned long caller)
{
    int size = obj_size(cachep);

    addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];

    if (size < 5 * sizeof(unsigned long))
        return;

    *addr++ = 0x12345678;
    *addr++ = caller;
    *addr++ = smp_processor_id();
    size -= 3 * sizeof(unsigned long);
    {
        unsigned long *sptr = &caller;
        unsigned long svalue;

        while (!kstack_end(sptr)) {
            svalue = *sptr++;
            if (kernel_text_address(svalue)) {
                *addr++ = svalue;
                size -= sizeof(unsigned long);
                if (size <= sizeof(unsigned long))
                    break;
            }
        }

    }
    *addr++ = 0x87654321;
}

void kernel_map_pages(struct page *page, int numpages, int enable)
{
    if (enable)
        unpoison_pages(page, numpages);
    else
        poison_pages(page, numpages);
}
static void poison_pages(struct page *page, int n)
{
    int i;

    for (i = 0; i < n; i++)
        poison_page(page + i);
}
static void poison_page(struct page *page)
{
    void *addr = kmap_atomic(page);

    set_page_poison(page);
    memset(addr, PAGE_POISON, PAGE_SIZE);/*0xaa*/
    kunmap_atomic(addr);
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值