vmalloc()概览
vmalloc()源码解析
__vmalloc()->__vmalloc_node()
在接口函数调用时,驱动往往会调用__vmalloc()函数, 在__vmalloc中会调用__vmalloc_node_range()
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
__builtin_return_address(0));
}
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller)
{
return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
gfp_mask, prot, 0, node, caller);
}
VMALLOC_START: 是vmalloc区域的开始地址,以内核模块区域的结束地址为起始点
VMALLOC_END: vmalloc区域的结束地址
__vmalloc_node_range()
void *__vmalloc_node_range(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller)
{
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
size = PAGE_ALIGN(size);
//vmalloc分配的大小要以页面大小对其。不能小于0或者大于系统所有内存
if (!size || (size >> PAGE_SHIFT) > totalram_pages())
goto fail;
//调用__get_vm_area_node函数
area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
//调用__vmalloc_area_node函数,分配物理内存,并和vm_struct建立映射关系
addr = __vmalloc_area_node(area, gfp_mask, prot, node);
if (!addr)
return NULL;
/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
* Now, it is fully initialized, so remove this flag here.
*/
clear_vm_uninitialized_flag(area);
kmemleak_vmalloc(area, size, gfp_mask);
return addr;
fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure: %lu bytes", real_size);
return NULL;
}
__get_vm_area_node()
static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long flags, unsigned long start,
unsigned long end, int node, gfp_t gfp_mask, const void *caller)
{
struct vmap_area *va;
struct vm_struct *area;
unsigned long requested_size = size;
BUG_ON(in_interrupt());
//再次对齐页面
size = PAGE_ALIGN(size);
if (unlikely(!size))
return NULL;
//如果分配的vmalloc区域是用于IOREMAP的,默认按照128个页面对齐
if (flags & VM_IOREMAP)
align = 1ul << clamp_t(int, get_count_order_long(size),
PAGE_SHIFT, IOREMAP_MAX_ORDER);
//分配一个vm_struct数据结构描述vmalloc区域
area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area))
return NULL;
//如果没有定义VM_NO_GUARD,则需要多分配一个页面备用。
//如分配4KB的内存,vmalloc会分配8KB内存块
if (!(flags & VM_NO_GUARD))
size += PAGE_SIZE;
//调用alloc_vmap_area分配vmalloc区域。
//alloc_vmap_area在vmalloc区域中查找一块大小合适且没有使用的空间
//这段空间称为缝隙hole
//start指VMALLOC_START,end指VMALLOC_END
va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
if (IS_ERR(va)) {
kfree(area);
return NULL;
}
kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
//调用setup_vmalloc_vm构建vm_struct空间并返回该vm_struct数据结构
setup_vmalloc_vm(area, va, flags, caller);
return area;
}
setup_vmalloc_vm()
//调用setup_vmalloc_vm构建vm_struct空间并返回该vm_struct数据结构
static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
unsigned long flags, const void *caller)
{
spin_lock(&vmap_area_lock);
setup_vmalloc_vm_locked(vm, va, flags, caller);
spin_unlock(&vmap_area_lock);
}
__vmalloc_area_node()
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ?
0 :
__GFP_HIGHMEM; //设置高端内存分配掩码,优先使用高端内存
//计算vm_struct区域包含的页面数量
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
PAGE_KERNEL, node, area->caller);
} else {
pages = kmalloc_node(array_size, nested_gfp, node);
}
if (!pages) {
remove_vm_area(area->addr);
kfree(area);
return NULL;
}
//保存已分配页面的page数据结构的指针
area->pages = pages;
area->nr_pages = nr_pages;
//通过for循环遍历所有area->nr_pages页面,
//为每个页面调用alloc_page()接口函数分配实际的物理页面
//由于每个物理页面是单独调用alloc_page()
//因此物理页面可能不是连续的
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask|highmem_mask);
else
page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
goto fail;
}
area->pages[i] = page;
if (gfpflags_allow_blocking(gfp_mask))
cond_resched();
}
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
//调用map_vm_area函数建立页面映射,该函数最终会调用
//vmap_page_range_no_flush()遍历页表和填充对应的页表
if (map_vm_area(area, prot, pages))
goto fail;
//返回vm_struct区域的起始地址。
return area->addr;
fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
(area->nr_pages*PAGE_SIZE), area->size);
__vfree(area->addr);
return NULL;
}