利用save_stack(),print_stack()查linux内核的内存泄露

在之前公司搞了5年的linux内核和驱动,打算翻翻笔记拿点东西出来写。

查内存泄露时自己想的一个办法,经过多次考验是有效的,在32位,64位的arm上都试过,检查出来过原生内核的内存泄露bug,后来在mips系统也用过一次。

简单的原理是,在内存分配的入口函数__alloc_pages_nodemask调用save_stack()把内存分配的堆栈保存下来,然后每隔一段时间比如两分钟,统计一下已分配出去的页是通过怎样的堆栈分配出去的,

比如以下例子,如果统计出来非常多的页是通过下面堆栈分配的,那就说明videobuf_alloc函数存在内存分配但不释放的情况。

[ 869.964000] [<800baf34>] slob_new_pages.clone.19+0x18/0x38
[ 869.968000] [<800bb138>] slob_alloc.clone.20+0x1e4/0x310
[ 869.976000] [<800bb3b4>] __kmalloc_node+0xbc/0xd8
[ 869.980000] [<802d7034>] videobuf_alloc+0x34/0x74
[ 869.984000] [<802d7160>] __videobuf_mmap_setup+0xec/0x154
[ 869.992000] [<802d79e4>] videobuf_reqbufs+0x190/0x22c
[ 869.996000] [<802d95fc>] gl5201vout_vidioc_reqbufs+0x94/0x114
[ 870.000000] [<802cc8c4>] __video_do_ioctl+0x1420/0x4114
[ 870.008000] [<802cf794>] video_ioctl2+0x1dc/0x558
[ 870.012000] [<800cf2dc>] vfs_ioctl+0xd0/0xec
[ 870.016000] [<800cfbf4>] do_vfs_ioctl+0x90/0x6b8
[ 870.020000] [<800d0260>] sys_ioctl+0x44/0x94
[ 870.024000] [<80018524>] stack_done+0x20/0x3c

下面是代码示例,首先是在一个初始化函数分配足够的内存用于保存堆栈,每一个page都要一块内存保存堆栈,512M内存含131072个页,需要131072*sizeof(struct my_save_condense)内存空间,

然后搞一个定期统计的work。

 #include <linux/stacktrace.h>

struct my_save_condense {

    struct stack_trace trace;

    long entries[16];

    int valid;

};

extern struct my_save_condense *my_stack_save2;

#define MAX_PAGE_COUNT 131072

static void my_work_proc(struct work_struct *ignored);

static DECLARE_DELAYED_WORK(my_work, my_work_proc);

extern void show_mem();

unsigned int pm_inited=0;

static void my_work_proc(struct work_struct *ignored)

{

    show_mem();

    schedule_delayed_work(&my_work, 120*HZ);

}

static int __init asoc_pm_init(void)

{

    pr_info("[PM] ASOC Power Management\n");

    schedule_delayed_work(&my_work, HZ*20);

    my_stack_save2=(struct my_save_condense *)kmalloc(MAX_PAGE_COUNT*sizeof(struct     my_save_condense));

    pm_inited=1;

}

late_initcall(asoc_pm_init);

下面是在内存分配的地方,page_alloc.c的__alloc_pages_nodemask添加保存堆栈的代码,灰色高显的代码。

page_alloc.c

 #include <linux/stacktrace.h>

struct my_save_condense {

    struct stack_trace trace;

    long entries[16];

    int valid;

};

struct my_save_condense *my_stack_save2;

extern unsigned int pm_inited;

/*

 * This is the 'heart' of the zoned buddy allocator.

 */

struct page *

__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,

            struct zonelist *zonelist, nodemask_t *nodemask)

{

    enum zone_type high_zoneidx = gfp_zone(gfp_mask);

    struct zone *preferred_zone;

    struct page *page;

    unsigned long pfn;

    int migratetype = allocflags_to_migratetype(gfp_mask);

    gfp_mask &= gfp_allowed_mask;

    lockdep_trace_alloc(gfp_mask);

    might_sleep_if(gfp_mask & __GFP_WAIT);

    if (should_fail_alloc_page(gfp_mask, order))

        return NULL;

    /*

     * Check the zones suitable for the gfp_mask contain at least one

     * valid zone. It's possible to have an empty zonelist as a result

     * of GFP_THISNODE and a memoryless node

     */

    if (unlikely(!zonelist->_zonerefs->zone))

        return NULL;

    get_mems_allowed();

    /* The preferred zone is used for statistics later */

    first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);

    if (!preferred_zone) {

        put_mems_allowed();

        return NULL;

    }

    /* First allocation attempt */

    page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,

            zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,

            preferred_zone, migratetype);

    if (unlikely(!page))

        page = __alloc_pages_slowpath(gfp_mask, order,

                zonelist, high_zoneidx, nodemask,

                preferred_zone, migratetype);

    put_mems_allowed();

    trace_mm_page_alloc(page, order, gfp_mask, migratetype);

    pfn=page_to_pfn(page);

    if (pfn_valid(pfn)&&pm_inited==1){

        my_stack_save2[pfn].valid = 0x12345678;

        memset(&(my_stack_save2[pfn].entries[0]), 0, 64);

        my_stack_save2[pfn].trace.entries = my_stack_save2[pfn].entries;

        my_stack_save2[pfn].trace.max_entries = 16;

        my_stack_save2[pfn].trace.skip = 2;

        my_stack_save2[pfn].trace.nr_entries=0;

    

        save_stack_trace(&my_stack_save2[pfn].trace);

    }

    return page;

}

定期统计的函数单独放在show_mem.c,在内核show_mem()函数进行修改。

遍历每个已分配出去的页,即page_count(page)>0的页,这里有一个compare_stack_my()函数,用来比较堆栈是否相同,相同的堆栈,页数累加,

这里的全局变量struct my_save_condense save_it_begin[250],假定不同内存分配堆栈数量不超过250个,

最后打印有多少个堆栈,每个堆栈分配了多少个page。

show_mem.c

#include <linux/mm.h>

#include <linux/nmi.h>

#include <linux/quicklist.h>

#include <linux/stacktrace.h>

#include <linux/kallsyms.h>

struct my_save_condense {

    struct stack_trace trace;

    long entries[16];

    int valid;

};

struct my_save_condense save_it_begin[250];

int now_pos_begin=0;

extern struct my_save_condense *my_stack_save2;

void compare_stack_my(struct my_save_condense *p_save,struct my_save_condense *save_array,int array_count, int *now_pos,int count)

{

    int i,j,diff,k,index=0;

        diff=0;

        for (i=0;i<*now_pos;i++){

            diff=0;

            for(j=0;j<p_save->trace.nr_entries&&j<8;j++){

                if(save_array[i].entries[j]!=p_save->entries[j]) {

                    diff=1;

                    break;

                }

            }

            if(diff==0){

                save_array[i].valid+=count;

                index=i;

                break;

            }

        }

        if (diff==1 || *now_pos==0){

            if(*now_pos>=array_count){

                printk(KERN_ALERT"overflow\n");

            } else {

                *now_pos = *now_pos + 1;

                

                memcpy(&save_array[*now_pos-1],p_save,sizeof(struct my_save_condense));

                save_array[*now_pos-1].trace.entries = save_array[*now_pos-1].entries;

                save_array[*now_pos-1].valid=count;

                index=*now_pos-1;

            }

        }

}

void show_mem(void)

{

    pg_data_t *pgdat;

    unsigned long total = 0, reserved = 0, shared = 0,

        nonshared = 0, highmem = 0,mapcount_shared=0,mapcount_nonshared=0,anon_not_mapped=0,file_not_mapped=0,my_vmalloc=0,

        mapcount_nonshared_file=0,mapcount_nonshared_anon=0,

        mapcount_shared_anon=0,mapcount_shared_file=0,

        mapcount_shares_file=0,mapcount_shares_anon=0,my_private=0,my_slabnotfree=0,my_other=0,slab_count=0;

    

    struct anon_vma *anon_vma;

    struct anon_vma_chain *avc;

    pgoff_t pgoff;

    int j,k,l;

    struct mm_struct *mm;

    struct vm_area_struct *vma;

    struct task_struct *t,*p;

    char *q;

    struct dentry *alias;

    struct inode *ii;

    pte_t *pte;

    spinlock_t *ptl;

    int c=1;

    printk("Mem-Info:\n");

    show_free_areas();

    memset(save_it_begin,0,250*sizeof(struct my_save_condense));

    now_pos_begin=0;

    memset(save_it_end,0,250*sizeof(struct my_save_condense));

    now_pos_end=0;

    for_each_online_pgdat(pgdat) {

        unsigned long i, flags;

        pgdat_resize_lock(pgdat, &flags);

        for (i = 0; i < pgdat->node_spanned_pages; i+=c) {

            struct page *page;

            unsigned long pfn = pgdat->node_start_pfn + i;

            if (unlikely(!(i % MAX_ORDER_NR_PAGES)))

                touch_nmi_watchdog();

            if (!pfn_valid(pfn))

                continue;

            page = pfn_to_page(pfn);

            if (PageHighMem(page))

                highmem++;

            if (PageReserved(page)){

                reserved++;

                c=1;

            }

            else if (page_count(page) == 1){

                if(PageCompound(page)){

                    if(PageTail(page)){

                        //destroy_compound_page先__ClearPageHead,再__ClearPageTail,错误统计几次,无所谓

                        printk(KERN_ALERT"pfn %ld PageTail first_page pfn %ld\n",pfn,

                            page_to_pfn(page->first_page));

                        c=1;

                    } else {

                        struct page *pg=page+1;;

                        c=1<<((int)(page[1].lru.prev));

                        if(c>2048){

                            printk(KERN_ALERT"possible?compound page over 8M\n");

                        }

                        //printk(KERN_DEBUG"Compound pfn %ld count %d\n",pfn,c);

                    }

                } else {

                    c=1;

                }

                nonshared+=c;

                if(PageAnon(page)){

                    if(page_mapcount(page)==0){

                        anon_not_mapped+=c;

                        

                    }else if (page_mapcount(page)>1)

                    {

                        pr_alert(".possible? anon page_count 1 mapcount %d pfn %d\n",page_mapcount(page),pfn);

                    }

                }else if (PageSlab(page)){

                    slab_count++;

                    if (my_stack_save2[pfn].valid==0x12345678 ){

                            compare_stack_my(&my_stack_save2[pfn],

                                save_it_end,

                                250,

                                &now_pos_end,

                                c);

                    }

                }else{

                    if(page_mapcount(page)==0){

                        struct address_space *mapping1 = page->mapping;

                        

                        if(mapping1 == NULL){

                            if (my_stack_save2[pfn].valid==0x12345678 ){

                                    compare_stack_my(&my_stack_save2[pfn],

                                        save_it_begin,

                                        250,

                                        &now_pos_begin,

                                        c);

                            }

                            if(PageHead(page)&&!(page->flags &  (1 << PG_slab))){

                                my_slabnotfree+=c;

                            }  else if (page->flags & (1 << PG_private)){

                                my_private+=c;

                            }  else {

                                my_other+=c;//main thread_info,pte

                            }

                        } else {

                            file_not_mapped+=c;

                        }

                        

                        

                    }else if (page_mapcount(page)>1)

                    {

                        pr_alert(".possible? file page_count 1 mapcount %d pfn %d\n",page_mapcount(page),pfn);

                    }

                }

            }

            

            else if (page_count(page) > 1){

                if(PageCompound(page)){

                    if(PageTail(page)){

                        printk(KERN_ALERT"pfn %ld PageTail first_page pfn %ld\n",pfn,

                            page_to_pfn(page->first_page));

                        c=1;

                    } else {

                        struct page *pg=page+1;;

                        c=1<<((int)(page[1].lru.prev));

                        if(c>2048){

                            printk(KERN_ALERT"possible?compound page over 8M\n");

                        }

                        //printk(KERN_DEBUG"Compound pfn %ld count %d %d\n",pfn,c);

                    }

                } else {

                    c=1;

                }

                shared+=c;

                if(PageAnon(page)){

                    if(page_mapcount(page)==0){

                        pr_alert("possible? anon page_count %d mapcount 0 pfn %d\n",page_count(page),pfn);

                    }

                }else if(PageSlab(page)){

                    slab_count++;

                    if (my_stack_save2[pfn].valid==0x12345678 ){

                            compare_stack_my(&my_stack_save2[pfn],

                                save_it_end,

                                250,

                                &now_pos_end,

                                c);

                    }

                }else{

                    if(page_mapcount(page)==0){

                        struct address_space *mapping1 = page->mapping;

                        

                        if(mapping1 == NULL){

                            if (my_stack_save2[pfn].valid==0x12345678 ){

                                    compare_stack_my(&my_stack_save2[pfn],

                                        save_it_begin,

                                        250,

                                        &now_pos_begin,

                                        c);

                            }

                            if(PageHead(page)&&!(page->flags &  (1 << PG_slab))){

                                my_slabnotfree+=c;

                            } else if (page->flags & (1 << PG_private)){

                                my_private+=c;

    

                            }else {

                                my_other+=c;//main thread_info pte

                            }

    

                        } else {

    

                            file_not_mapped+=c;

                        }

                    }

                }

            } else {

                c=1;

            }

            

            total+=c;

        }

        pgdat_resize_unlock(pgdat, &flags);

    }

    printk("%lu pages RAM\n", total);

#ifdef CONFIG_HIGHMEM

    printk("%lu pages HighMem\n", highmem);

#endif

    printk("%lu pages reserved\n", reserved);

    printk("%lu pages shared\n", shared);

    printk("%lu pages non-shared\n", nonshared);

#ifdef CONFIG_QUICKLIST

    printk("%lu pages in pagetable cache\n",

        quicklist_total_size());

#endif

    printk(KERN_DEBUG"anon_not_mapped %lu file_not_mapped %lu private %lu slabnotfree %lu other %lu slab %d\n",

        anon_not_mapped,file_not_mapped,my_private,my_slabnotfree,my_other,slab_count);

    printk("now_pos_begin %d\n",now_pos_begin);

    for(j=0;j<now_pos_begin;j++){

        if(save_it_begin[j].valid>50){

            printk(KERN_DEBUG"[%d] nr_entries %d valid %d\n",j,

                save_it_begin[j].trace.nr_entries,save_it_begin[j].valid);

            print_stack_trace(&(save_it_begin[j].trace),0);

        }

    }

}

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值