9. malloc

malloc()函数是C语言中内存分配函数,学习c语言的初学者经常会有如下的困扰。

    假设系统中有进程A和进程B,分别用testA和testB函数分配内存:

//进程A

void testA(void)
{
    char * bufA = malloc(100);
    ......
    *buf = 100;
}
//进程B
void testB(void)
{
    char * bufB = malloc(100);
    mlock(bufB, 100);
    .....
}

问题:

  • malloc()函数返回的内存是否马上就分配物理内存?testA和testB分别在何时分配物理内存?

答:

  • 假设不考虑libc的因素,malloc分配100Byte,那么实际上内核是为其分配100Byte吗?

答:

  • 假设使用printf打印指针bufA和bufB指向的地址是一样的,那么在内核中这两块虚拟内存是否"打架"了呢?

答:

  • vm_normal_page()函数返回的什么样页面的struct page数据结构?为什么内存管理代码需要这个函数?

答:

  • 请简述get_user_page()函数的作用和实现流程。

答:

  • 请简述follow_page()函数的作用的实现流程。

答:

    malloc()函数是C函数封装的一个核心函数,C函数库会做一些处理后调用linux内核系统去调用brk,所以大家并不熟悉brk的系统调用,原因在于很少有人会直接使用系统调用brk向系统申请内存,而总是通过malloc()之类的C函数库的API函数。如果把malloc()想象成零售,那么brk就是代理商。malloc函数的实现为用户进程维护一个本地小仓库,当进程需要使用更多的内存时就向这个小仓库要货,小仓库存量不足时就通过代理商,brk向内核批发。

brk实现:

    在32位linux内核中,每个用户进程拥有3GB的虚拟空间。内核如何为用户空间来划分这3GB的虚拟空间呢?用户进程的可执行文件由代码段和数据段组成,数据段包括所有的静态分配的数据空间,例如全局变量和静态局部变量等。这些空间在可执行文件装载时,内核就为其分配好这些空间,包括虚拟地址和物理页面,并建立好二者的映射关系。如下图,用户进程的用户栈从3GB虚拟空间的顶部开始,由顶向下延伸,而brk分配的空间是从数据段的顶部end_data到用户栈的底部。所以动态分配空间是从进程的end_data开始,每次分配一块空间,就把这个边界往上推进一段,同时内核和进程都会记录当前的边界的位置。 

brk系统调用主要实现在mm/mmap.c函数中。

SYSCALL_DEFINE1(brk, unsigned long, brk)
{
    unsigned long retval;
    unsigned long newbrk, oldbrk;
    struct mm_struct *mm = current->mm;
    unsigned long min_brk;
    bool populate;

    down_write(&mm->mmap_sem);

#ifdef CONFIG_COMPAT_BRK
    /*
     * CONFIG_COMPAT_BRK can still be overridden by setting
     * randomize_va_space to 2, which will still cause mm->start_brk
     * to be arbitrarily shifted
     */
    if (current->brk_randomized)
        min_brk = mm->start_brk;
    else
        min_brk = mm->end_data;/*用户进程的struct mm_struct数据结构有一个变量存放数据段的结束地址,
        如果brk请求的边界小于这个地址,那么请求无效*/
#else
    
    min_brk = mm->start_brk;
#endif
    if (brk < min_brk)/*无效的地址*/
        goto out;

    /*
     * Check against rlimit here. If this check is done later after the test
     * of oldbrk with newbrk then it can escape the test and let the data
     * segment grow beyond its set limit the in case where the limit is
     * not page aligned -Ram Gupta
     */
    if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
                  mm->end_data, mm->start_data))
        goto out;

     /*newbrk表示新边界*/
    newbrk = PAGE_ALIGN(brk);
    /*mm->brk记录动态分配区的当前底部,参数brk表示所要求的的新边界,是用户进程要求分配内存的大小与当前
    底部边界相减*/
    oldbrk = PAGE_ALIGN(mm->brk);
    if (oldbrk == newbrk)
        goto set_brk;

    /* Always allow shrinking brk.*/
    /*如果新边界小于老边界,那么表示释放空间,调用do_munmap()来释放这一部分空间内存*/
    if (brk <= mm->brk) {
        if (!do_munmap(mm, newbrk, oldbrk-newbrk))
            goto set_brk;
        goto out;
    }

    /* Check against existing mmap mappings. */
    /*find_vma_intersection()函数以老边界oldbrk地址去查找系统中有没有一块已经存在的VMA,
    它通过find_vma()来查找当前用户进程中是否已经有一块VMA和start_addr地址有重叠。
    如果老边界开始的地址空间在使用了,就不需要再寻找了。*/
    if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
        goto out;

    /* Ok, looks good - let it rip. */
    /*do_brk是brk的核心函数,我们查看其实现*/
    if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
        goto out;

set_brk:
    mm->brk = brk;
    populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
    up_write(&mm->mmap_sem);
    if (populate)
        mm_populate(oldbrk, newbrk - oldbrk);
    return brk;

out:
    retval = mm->brk;
    up_write(&mm->mmap_sem);
    return retval;
}

do_brk()函数实现:

/*
 *  this is really a simplified "do_mmap".  it only handles
 *  anonymous maps.  eventually we may be able to do some
 *  brk-specific accounting here.
 */
static unsigned long do_brk(unsigned long addr, unsigned long len)
{
    struct mm_struct *mm = current->mm;
    struct vm_area_struct *vma, *prev;
    unsigned long flags;
    struct rb_node **rb_link, *rb_parent;
    pgoff_t pgoff = addr >> PAGE_SHIFT;
    int error;
    
    /*申请分配内存大小要以页面大小对齐。*/
    len = PAGE_ALIGN(len);
    if (!len)
        return addr;

    flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;

    /*get_unmapped_area()函数用来判断虚拟内存空间是否有足够的空间,返回一段没有映射过的
    空间的起始地址,这个函数会调用到具体的体系结构中实现,下面我们看下此函数实现*/
    error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
    if (error & ~PAGE_MASK)
        return error;

    /*此函数判断flags是否置位VM_LOCKED,这个VM_LOCKED通常从mlock系统调用中设置而来。如果有
    那么需要调用mm_populate()马上分配物理内存并建立映射。通常用户程序很少使用VM_LOCKED分配
    掩码,所以brk不会为这个用户进程立马分配物理页面,而是一直将分配物理页面的工作推延到用户
    进程要访问这个虚拟页面时,发生了缺页中断才会分配物理内存,并和虚拟地址建立映射关系。*/
    error = mlock_future_check(mm, mm->def_flags, len);
    if (error)
        return error;

    /*
     * mm->mmap_sem is required to protect against another thread
     * changing the mappings in case we sleep.
     */
    verify_mm_writelocked(mm);

    /*
     * Clear old maps.  this also does some error checking for us
     */
 munmap_back:
    /*find_vma_links()函数查看之前的文章,它循环遍历用户进程红黑树中的VMAs,然后根据addr来查找
    最合适插入到红黑树的节点,最终rb_link指针指向最合适节点的br_left或rb_right指针本身的地址。
    返回0表示寻找到最合适插入的节点,返回-ENOMEM表示现有的VMA重叠,这时会调用do_munmap()函数来
    释放这段重叠的空间。*/
    if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
        if (do_munmap(mm, addr, len))
            return -ENOMEM;
        goto munmap_back;
    }

    /* Check against address space limits *after* clearing old maps... */
    if (!may_expand_vm(mm, len >> PAGE_SHIFT))
        return -ENOMEM;

    if (mm->map_count > sysctl_max_map_count)
        return -ENOMEM;

    if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
        return -ENOMEM;

    /* Can we just expand an old private anonymous mapping? */
    /*vma_merge()函数去找有没有可能合并addr附近的VMA。如果没办法合并,那么
    只能新创建一个VMA,VMA的地址空间就是[addr, addr+len]*/
    vma = vma_merge(mm, prev, addr, addr + len, flags,
                    NULL, NULL, pgoff, NULL);
    if (vma)
        goto out;

    /*
     * create a vma struct for an anonymous mapping
     */
    vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
    if (!vma) {
        vm_unacct_memory(len >> PAGE_SHIFT);
        return -ENOMEM;
    }

    INIT_LIST_HEAD(&vma->anon_vma_chain);
    vma->vm_mm = mm;
    vma->vm_start = addr;
    vma->vm_end = addr + len;
    vma->vm_pgoff = pgoff;
    vma->vm_flags = flags;
    vma->vm_page_prot = vm_get_page_prot(flags);

    /*新创建的VMA需要加入到mm->mmap链表和红黑树中,vma_link()函数实这个功能*/
    vma_link(mm, vma, prev, rb_link, rb_parent);
out:
    perf_event_mmap(vma);
    mm->total_vm += len >> PAGE_SHIFT;
    if (flags & VM_LOCKED)
        mm->locked_vm += (len >> PAGE_SHIFT);
    vma->vm_flags |= VM_SOFTDIRTY;
    return addr;
}
返回到brk()函数

get_unmapped_area()函数实现:判断虚拟地址空间是否充足,返回一段没有映射过的虚拟地址空间

[do_brk()->get_unmapped_area()]

unsigned long
get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
        unsigned long pgoff, unsigned long flags)
{
    unsigned long (*get_area)(struct file *, unsigned long,
                  unsigned long, unsigned long, unsigned long);

    unsigned long error = arch_mmap_check(addr, len, flags);
    if (error)
        return error;

    /* Careful about overflows.. */
    if (len > TASK_SIZE)
        return -ENOMEM;

    get_area = current->mm->get_unmapped_area;
    if (file && file->f_op->get_unmapped_area)
        get_area = file->f_op->get_unmapped_area;
    /*get_area()是体系结构相关的函数,在ARM架构中的实现函数为arch_get_unmapped_area_topdown()
     下面查看其实现*/
    addr = get_area(file, addr, len, pgoff, flags);
    if (IS_ERR_VALUE(addr))
        return addr;

    if (addr > TASK_SIZE - len)
        return -ENOMEM;
    if (addr & ~PAGE_MASK)
        return -EINVAL;

    addr = arch_rebalance_pgtables(addr, len);
    error = security_mmap_addr(addr);
    return error ? error : addr;
}
回到do_brk函数
static inline int mlock_future_check(struct mm_struct *mm,
                     unsigned long flags,
                     unsigned long len)
{
    unsigned long locked, lock_limit;

    /*  mlock MCL_FUTURE? */
    if (flags & VM_LOCKED) {
        locked = len >> PAGE_SHIFT;
        locked += mm->locked_vm;
        lock_limit = rlimit(RLIMIT_MEMLOCK);
        lock_limit >>= PAGE_SHIFT;
        if (locked > lock_limit && !capable(CAP_IPC_LOCK))
            return -EAGAIN;
    }
    return 0;
}

arch_get_unmapped_area_topdown()函数实现:

[arch/arm/mm/mmap.c]

[do_brk()->get_unmapped_area()->arch_get_unmapped_area_topdown()]

unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
            const unsigned long len, const unsigned long pgoff,
            const unsigned long flags)
{
    struct vm_area_struct *vma;
    struct mm_struct *mm = current->mm;
    unsigned long addr = addr0;
    int do_align = 0;
    int aliasing = cache_is_vipt_aliasing();
    struct vm_unmapped_area_info info;

    /*
     * We only need to do colour alignment if either the I or D
     * caches alias.
     */
    if (aliasing)
        do_align = filp || (flags & MAP_SHARED);

    /* requested length too big for entire address space */
    if (len > TASK_SIZE)
        return -ENOMEM;

    if (flags & MAP_FIXED) {
        if (aliasing && flags & MAP_SHARED &&
            (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
            return -EINVAL;
        return addr;
    }

    /* requesting a specific address */
    if (addr) {
        if (do_align)
            addr = COLOUR_ALIGN(addr, pgoff);
        else
            addr = PAGE_ALIGN(addr);
        vma = find_vma(mm, addr);
        if (TASK_SIZE - len >= addr &&
                (!vma || addr + len <= vma->vm_start))
            return addr;
    }

    info.flags = VM_UNMAPPED_AREA_TOPDOWN;
    info.length = len;
    info.low_limit = FIRST_USER_ADDRESS;
    info.high_limit = mm->mmap_base;
    info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
    info.align_offset = pgoff << PAGE_SHIFT;
    addr = vm_unmapped_area(&info);

    /*
     * A failed mmap() very likely causes application failure,
     * so fall back to the bottom-up function here. This scenario
     * can happen with large stack limits and large mmap()
     * allocations.
     */
    if (addr & ~PAGE_MASK) {
        VM_BUG_ON(addr != -ENOMEM);
        info.flags = 0;
        info.low_limit = mm->mmap_base;
        info.high_limit = TASK_SIZE;
        addr = vm_unmapped_area(&info);
    }

    return addr;
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
Introduction The usual implementation of malloc and free are unforgiving to errors in their callers' code, including cases where the programmer overflows an array, forgets to free memory, or frees a memory block twice. This often does not affect the program immediately, waiting until the corrupted memory is used later (in the case of overwrites) or gradually accumulating allocated but unused blocks. Thus, debugging can be extremely difficult. In this assignment, you will write a wrapper for the malloc package that will catch errors in the code that calls malloc and free. The skills you will have learned upon the completion of this exercise are pointer arithmetic and a greater understanding of the consequences of subtle memory mistakes. Logistics Unzip debugging_malloc.zip into an empty directory. The files contained are as follows: File(s): Function: debugmalloc.c Contains the implementation of the three functions you will be writing. This is the one file you will be editing and handing in. debugmalloc.h Contains the declaration of the functions, as well as the macros that will call them. driver.c Contains main procedure and the code that will be calling the functions in the malloc package dmhelper.c, dmhelper.h Contain the helper functions and macros that you will be calling from your code grader.pl Perl script that runs your code for the various test cases and gives you feedback based on your current code debugmalloc.dsp Exercise 3 project file debugmalloc.dsw Exercise 3 workspace file tailor.h, getopt.c, getopt.h Tools that are used only by the driver program for I/O purposes. You will not need to know what the code in these files do. Others Required by Visual C++. You do not need to understand their purpose Specification Programs that use this package will call the macros MALLOC and FREE. MALLOC and FREE are used exactly the same way as the malloc() and free() functions in the standard C malloc package. That is, the line void *ptr = MALLOC ( n ) ;will allocate a payload of at least n bytes, and ptr will point to the front of this block. The line FREE(ptr);will cause the payload pointed to by ptr to be deallocated and become available for later use. The macros are defined as follows: #define MALLOC(s) MyMalloc(s, __FILE__, __LINE__) #define FREE(p) MyFree(p, __FILE__, __LINE__) The __FILE__ macro resolves to the filename and __LINE__ resolves to the current line number. The debugmalloc.c file contains three functions that you are required to implement, as shown: void *MyMalloc(size_t size, char *filename, int linenumber); void MyFree(void *ptr, char *filename, int linenumber); int AllocatedSize(); Using the macros above allow MyMalloc and MyFree to be called with the filename and line number of the actual MALLOC and FREE calls, while retaining the same form as the usual malloc package. By default, MyMalloc and MyFree() simply call malloc() and free(), respectively, and return immediately. AllocatedSize() should return the number of bytes currently allocated by the user: the sum of the requested bytes through MALLOC minus the bytes freed using FREE. By default, it simply returns 0 and thus is unimplemented. The definitions are shown below: void *MyMalloc(size_t size, char *filename, int linenumber) { return (malloc(size)); } void MyFree(void *ptr, char *filename, int linenumber) { free(ptr); } int AllocatedSize() { return 0; } Your job is to modify these functions so that they will catch a number of errors that will be described in the next section. There are also two optional functions in the debugmalloc.c file that you can implement: void PrintAllocatedBlocks(); int HeapCheck(); PrintAllocatedBlocks should print out information about all currently allocated blocks. HeapCheck should check all the blocks for possible memory overwrites. Implementation Details To catch the errors, you will allocate a slightly larger amount of space and insert a header and a footer around the "requested payload". MyMalloc() will insert information into this area, and MyFree() will check to see if the information has not changed. The organization of the complete memory block is as shown below: Header Checksum ... Fence Payload Footer Fence Note:MyMalloc() returns a pointer to the payload, not the beginning of the whole block. Also, the ptr parameter passed into MyFree(void *ptr) will point to the payload, not the beginning of the block. Information that you might want to store in this extra (header, footer) area include: a "fence" immediately around the requested payload with a known value like 0xCCDEADCC, so that you can check if it has been changed when the block is freed. the size of the block a checksum for the header to ensure that it has not been corrupted (A checksum of a sequence of bits is calculated by counting the number of "1" bits in the stream. For example, the checksum for "1000100010001000" is 4. It is a simple error detection mechanism.) the filename and line number of the MALLOC() call The errors that can occur are: Error #1: Writing past the beginning of the user's block (through the fence) Error #2: Writing past the end of the user's block (through the fence) Error #3: Corrupting the header information Error #4: Attempting to free an unallocated or already-freed block Error #5: Memory leak detection (user can use ALLOCATEDSIZE to check for leaks at the end of the program) To report the first four errors, call one of these two functions: void error(int errorcode, char *filename, int linenumber); errorcode is the number assigned to the error as stated above. filename and linenumber contain the filename and line number of the line (the free call) in which the error is invoked. For example, call error(2, filename, linenumber) if you come across a situation where the footer fence has been changed. void errorfl(int errorcode, char *filename_malloc, int linenumber_malloc, char *filename_free, int linenumber_free); This is the same as the error(), except there are two sets of filenames and line numbers, one for the statement in which the block was malloc'd, and the other for the statement in which the block was free'd (and the error was invoked). The fact that MyMalloc() and MyFree() are given the filename and line number of the MALLOC() and FREE() call can prove to be very useful when you are reporting errors. The more information you print out, the easier it will be for the programmer to locate the error. Use errorfl() instead of error() whenever possible. errorfl() obviously cannot be used on situations where FREE() is called on an unallocated block, since it was not ever MALLOC'd. Note: You will only be reporting errors from MyFree(). None of the errors can be caught in MyMalloc() In the case of memory leaks, the driver program will call AllocatedSize(), and the grader will look at its return value and possible output. AllocatedSize() should return the number of bytes currently allocated from MALLOC and FREE calls. For example, the code segment: void *ptr1 = MALLOC(10), *ptr2 = MALLOC(8); FREE(ptr2); printf("%d\n", AllocatedSize()); should print out "10". Once you have gotten to the point where you can catch all of the errors, you can go an optional step further and create a global list of allocated blocks. This will allow you to perform analysis of memory leaks and currently allocated memory. You can implement the void PrintAllocatedBlocks() function, which prints out the filename and line number where all currently allocated blocks were MALLOC()'d. A macro is provided for you to use to print out information about a single block in a readable and gradeable format: PRINTBLOCK(int size, char *filename, int linenumber) Also, you can implement the int HeapCheck() function. This should check all of the currently allocated blocks and return -1 if there is an error and 0 if all blocks are valid. In addition, it should print out the information about all of the corrupted blocks, using the macro #define PRINTERROR(int errorcode, char *filename, int linenumber), with errorcode equal to the error number (according to the list described earlier) the block has gone through. You may find that this global list can also allow you to be more specific in your error messages, as it is otherwise difficult to determine the difference between an overwrite of a non-payload area and an attempted FREE() of an unallocated block. Evaluation You are given 7 test cases to work with, plus 1 extra for testing a global list. You can type "debugmalloc -t n" to run the n-th test. You can see the code that is being run in driver.c. If you have Perl installed on your machine, use grader.pl to run all the tests and print out a table of results. There are a total of 100 possible points. Here is a rundown of the test cases and desired output (do not worry about the path of the filename): Test case #1 Code char *str = (char *) MALLOC(12); strcpy(str, "123456789"); FREE(str); printf("Size: %d\n", AllocatedSize()); PrintAllocatedBlocks(); Error # None Correct Output Size: 0 Points worth 10 Details 10 points for not reporting an error and returning 0 in AllocatedSize() Test case #2 Code char *str = (char *) MALLOC(8); strcpy(str, "12345678"); FREE(str); Error # 2 Correct Output Error: Ending edge of the payload has been overwritten. in block allocated at driver.c, line 21 and freed at driver.c, line 23 Points worth 15 Details 6 pts for catching error 3 pts for printing the filename/line numbers 6 pts for correct error message Test case #3 Code char *str = (char *) MALLOC(2); strcpy(str, "12"); FREE(str); Error # 2 Correct Output Error: Ending edge of the payload has been overwritten. in block allocated at driver.c, line 28 and freed at driver.c, line 30 Points worth 15 Details 6 pts for catching error 3 pts for printing the filename/line numbers 6 pts for correct error message Test case #4 Code void *ptr = MALLOC(4); *ptr2 = MALLOC(6); FREE(ptr); printf("Size: %d\n", AllocatedSize()); PrintAllocatedBlocks(); Error # None Correct Output Size: 6 Currently allocated blocks: 6 bytes, created at driver.c, line 34 Points worth 15 Details 15 pts for not reporting an error and returning 6 from AllocatedSize Extra for printing out the extra block Test case #5 Code void *ptr = MALLOC(4); FREE(ptr); FREE(ptr); Error # 4 Correct Output Error: Attempting to free an unallocated block. in block freed at driver.c, line 43 Points worth 15 Details 15 pts for catching error Extra for correct error message Test case #6 Code char *ptr = (char *) MALLOC(4); *((int *) (ptr - 8)) = 8 + (1 << 31); FREE(ptr); Error # 1 or 3 Correct Output Error: Header has been corrupted.or Error: Starting edge of the payload has been overwritten. in block allocated at driver.c, line 47 and freed at driver.c, line 49 Points worth 15 Details 9 pts for catching error 6 pts for a correct error message Test case #7 Code char ptr[5]; FREE(ptr); Error # 4 Correct Output Error: Attempting to free an unallocated block. in block freed at driver.c, line 54 Points worth 15 Details 15 pts for recognizing error Extra for printing correct error message Test case #8 (Optional) Code int i; int *intptr = (int *) MALLOC(6); char *str = (char *) MALLOC(12); for(i = 0; i < 6; i++) { intptr[i] = i; } if (HeapCheck() == -1) { printf("\nCaught Errors\n"); } Error # None Correct Output Error: Ending edge of the payload has been overwritten. Invalid block created at driver.c, line 59 Caught Errors Points worth Extra Details "Caught Errors" indicates that the HeapCheck() function worked correctly. Extra points possible. Your instructor may give you extra credit for implementing a global list and the PrintAllocatedBlocks() and HeapCheck() functions.

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

byd yes

你的鼓励是我最大的动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值