目录
Vmalloc概览
vmalloc重要数据结构说明
Vmalloc数据结构关系
Vmalloc内存分配
vmalloc开机初始化
start_kernel---->mm_init---->vmalloc_initvoid __init vmalloc_init(void)
{
struct vmap_area *va;
struct vm_struct *tmp;
int i;
//初始化相关链表,初始化free_work用于统一释放一些在原子上下文中释放的vmalloc内存
for_each_possible_cpu(i) {
struct vmap_block_queue *vbq;
struct vfree_deferred *p;
vbq = &per_cpu(vmap_block_queue, i);
spin_lock_init(&vbq->lock);
INIT_LIST_HEAD(&vbq->free);
p = &per_cpu(vfree_deferred, i);
init_llist_head(&p->list);
INIT_WORK(&p->wq, free_work);
}
//将开机过程中分配的vmalloc区域插入红黑树中
for (tmp = vmlist; tmp; tmp = tmp->next) {
va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
va->flags = VM_VM_AREA;
va->va_start = (unsigned long)tmp->addr;
va->va_end = va->va_start + tmp->size;
va->vm = tmp;
__insert_vmap_area(va);
}
vmap_area_pcpu_hole = VMALLOC_END;
vmap_initialized = true;
}
Vmalloc
Vmalloc内存分配流程
alloc_vmap_area:分配一个vmap_area 结构,并且在红黑树中查找一个size大小的空闲区域
setup_vmalloc_vm:初始化vm_struct结构体体
__vmalloc_area_node :为 vm_struct分配物理页并创建页表映射前面查找到的地址空间
Vmalloc虚拟地址区间分配
分配一个vmap_area 结构,并且在红黑树中查找一个size大小的空闲区域,空闲地址空间必须位于vstart 到vend 之间,这里传入的是VMALLOC_START和 VMALLOC_END
tatic struct vmap_area *alloc_vmap_area(unsigned long size,
unsigned long align,
unsigned long vstart, unsigned long vend,
int node, gfp_t gfp_mask)
{
struct vmap_area *va;
struct rb_node *n;
unsigned long addr;
int purged = 0;
struct vmap_area *first;
//分配vmap_area用于描述一片连续的虚拟地址空间
va = kmalloc_node(sizeof(struct vmap_area),
gfp_mask & GFP_RECLAIM_MASK, node);
retry:
spin_lock(&vmap_area_lock);
if (!free_vmap_cache ||
size < cached_hole_size ||
vstart < cached_vstart ||
align < cached_align) {
nocache:
cached_hole_size = 0;
free_vmap_cache = NULL;
}
cached_vstart = vstart;
cached_align = align;
/*free_vmap_cache用于缓存上一次查找的vmap_area,下次查找空闲区域可以从这个位置开始,避免从VMALLOC_STAR开始浪费时间*/
if (free_vmap_cache) {
first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
addr = ALIGN(first->va_end, align);
if (addr < vstart)
goto nocache;
if (addr + size < addr)
goto overflow;
} else { //如果free_vmap_cache没有缓存上次查找结果就重头开始查找
addr = ALIGN(vstart, align);
if (addr + size < addr)
goto overflow;
n = vmap_area_root.rb_node;
first = NULL;
while (n) { //while循环查找到第一个查找位置
struct vmap_area *tmp;
tmp = rb_entry(n, struct vmap_area, rb_node);
if (tmp->va_end >= addr) {
first = tmp;
if (tmp->va_start <= addr)
break;
n = n->rb_left;
} else
n = n->rb_right;
}
if (!first)
goto found;
}
/*从first开始查找一个可以容纳size大小的空闲区域,找到的条件是
addr + size 小于first->va_start
addr + size 大于 vend
查找到链表vmap_area_list末尾*/
while (addr + size > first->va_start && addr + size <= vend) {
if (addr + cached_hole_size < first->va_start)
cached_hole_size = first->va_start - addr;
addr = ALIGN(first->va_end, align);
if (addr + size < addr)
goto overflow;
if (list_is_last(&first->list, &vmap_area_list))
goto found;
first = list_next_entry(first, list);
}
found:
if (addr + size > vend)
goto overflow;
//程序走到这里说明找打了可用的空闲区域
va->va_start = addr;
va->va_end = addr + size;
va->flags = 0;
__insert_vmap_area(va); //将初始化后的vmap_area插入红黑树
free_vmap_cache = &va->rb_node;
spin_unlock(&vmap_area_lock);
return va;
overflow:
spin_unlock(&vmap_area_lock);
if (!purged) { // 释放掉vmap_purge_list中的空间再次尝试
purge_vmap_area_lazy();
purged = 1;
goto retry;
}
if (gfpflags_allow_blocking(gfp_mask)) {
unsigned long freed = 0;
blocking_notifier_call_chain(&vmap_notify_list, 0, &freed); //通知做内存回收
if (freed > 0) { //如果有回收到内存再次尝试
purged = 0;
goto retry;
}
}
kfree(va);
return ERR_PTR(-EBUSY);
}
vmap_area_root是红黑树的根节点;全局变量vmap_area_list用于将vmap_area安地址从小到大排序。下面函数是将vmap_area同时插入红黑树和链表vmap_area_list中、
static void __insert_vmap_area(struct vmap_area *va)
{
struct rb_node **p = &vmap_area_root.rb_node;
struct rb_node *parent = NULL;
struct rb_node *tmp;
while (*p) { //查找插入点
struct vmap_area *tmp_va;
parent = *p;
tmp_va = rb_entry(parent, struct vmap_area, rb_node);
if (va->va_start < tmp_va->va_end)
p = &(*p)->rb_left;
else if (va->va_end > tmp_va->va_start)
p = &(*p)->rb_right;
else
BUG();
}
rb_link_node(&va->rb_node, parent, p);
rb_insert_color(&va->rb_node, &vmap_area_root);
tmp = rb_prev(&va->rb_node);
if (tmp) {//在红黑树中的prev节点也是地址上向前相邻的
struct vmap_area *prev;
prev = rb_entry(tmp, struct vmap_area, rb_node);
list_add_rcu(&va->list, &prev->list);
} else //如果没有prev节点就添加到vmap_area_list的末尾
list_add_rcu(&va->list, &vmap_area_list);
}
初始化vm_struct并关联vm_struct与vmap_areastatic void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
unsigned long flags, const void *caller)
{
spin_lock(&vmap_area_lock);
vm->flags = flags;
vm->addr = (void *)va->va_start;
vm->size = va->va_end - va->va_start;
vm->caller = caller;
va->vm = vm;
va->flags |= VM_VM_AREA;
spin_unlock(&vmap_area_lock);
}
vmalloc物理页分配和映射
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN;
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *)); //计算用于存放page地址的数组大小
area->nr_pages = nr_pages; //设置连续虚拟地址区间用page计算的大小
if (array_size > PAGE_SIZE) {
pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
PAGE_KERNEL, node, area->caller);
} else {
pages = kmalloc_node(array_size, nested_gfp, node); //分配pages数组
}
area->pages = pages; //用于存放page结构地址的数组
……
for (i = 0; i < area->nr_pages; i++) {//逐页分配物理页
struct page *page;
if (fatal_signal_pending(current)) {//检查当前进程是否由pending致命信号
area->nr_pages = i;
goto fail_no_warn;
}
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask); //分配一页
else
page = alloc_pages_node(node, alloc_mask, 0);
area->pages[i] = page; //将页的地址放到数组中
if (gfpflags_allow_blocking(gfp_mask))
cond_resched();
}
//映射前面分配到的物理页到连续虚拟地址空间中去
if (map_vm_area(area, prot, pages))
goto fail;
return area->addr; //返回虚拟地址区间的起始地址
fail:
......
fail_no_warn:
vfree(area->addr);
return NULL;
}
Vfree
void vfree(const void *addr)
{
kmemleak_free(addr);
if (!addr)
return;
if (unlikely(in_interrupt())) //如果在中断中是不允许睡眠的所以将释放工作交给free_work
__vfree_deferred(addr);
else
__vunmap(addr, 1);//释放掉虚拟地址空间和对应的物理页
}
schedule free_work去释放vfree_deferred中等待释放的内存
static inline void __vfree_deferred(const void *addr)
{
struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
if (llist_add((struct llist_node *)addr, &p->list))
schedule_work(&p->wq);
}
释放掉虚拟地址空间和对应的物理页
static void __vunmap(const void *addr, int deallocate_pages)
{
struct vm_struct *area;
/*解除虚拟地址区间和物理页的映射,将vmap_area 放到vmap_purge_list上去,后面再虚拟地址空间不足的时候再释放,这样可以减少查找连续地址空间的时间,也可以减少碎片*/
area = remove_vm_area(addr); //释放
if (unlikely(!area)) {
WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
addr);
return;
}
if (deallocate_pages) {
int i;
for (i = 0; i < area->nr_pages; i++) { //释放掉所有物理页
struct page *page = area->pages[i];
BUG_ON(!page);
__free_pages(page, 0);
}
kvfree(area->pages); //释放到page指针数组
}
kfree(area); //释放掉vm_struct
return;
}