内核版本:2.6.28
linux如何利用伙伴系统,slab分配器分配内存,用这些方法得到的内存在物理地址上都是连续的, 然而,有些时候,每次请求内存时,系统都分配物理地址连续的内存块是不合适的,可以利用小块内存“连接”成大块可使用的内存.这在操作系统设计中也被称为 “内存拼接”,显然,内存拼接在需要较大内存,而内存访问相比之下不是很频繁的情况下是比较有效的.
在linux内核中用来管理内存拼接的接口是vmalloc/vfree.用vmalloc分配得到的内存在线性地址是平滑的,但是物理地址上是非连续的.
函数vmalloc列出如下:(mm/vmalloc.c)
size: 分配的虚拟空间的大小.
gfp_mask: 页级分配器的标志.
prot: 已分配的保护掩码.
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
return __vmalloc_node(size, gfp_mask, prot, -1,
__builtin_return_address(0));
}
函数_vmalloc分配足够的页数与size相配,把它们映射进连续的内核虚拟空间,但分配的内存块不一定连续.在函数中第一步是在vmlist中寻找到一个大小合适的虚拟内存块(_get_vm_area_node(...)).
static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
int node, void *caller)
{
struct vm_struct *area;
//检查请求分配的内存大小有没有超过最大的物理页面数。如果超过返回 0 ,表示分配失败。
size = PAGE_ALIGN(size);
//有效性检查
if (!size || (size >> PAGE_SHIFT) > num_physpages)
return NULL;
//取得一个有效的vma
area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
node, gfp_mask, caller);
//如果申请的无效,返回一个NULL指针
if (!area)
return NULL;
return __vmalloc_area_node(area, gfp_mask, prot, node, caller);
}
第二步检查这个虚拟块是否可用(空闲),
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node, void *caller)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
//所要映射的页面总数
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
//计算数组大小,其中sizeof(struct page *)是计算页描述符号所占的空间
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
//如果数组大小大于1 个页面,在非连续区进行分配,否则在连续区进行分 配
if (array_size > PAGE_SIZE) {
//非连续区分配
pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO,
PAGE_KERNEL, node, caller);
area->flags |= VM_VPAGES;
} else {
//使用kmalloc_node在连续中进行分配
pages = kmalloc_node(array_size,
(gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
node);
}
area->pages = pages;
area->caller = caller;
//如果area->pages所指向的是无效的地址,即空间分配失败
if (!area->pages) {
remove_vm_area(area->addr); //用来将相应的vm从vmlist中断开,使其表示的空间可以被利用
kfree(area); //释放空间
return NULL;
}
// 从伙伴系统中进行物理内存页面的分配,注意是为每一个页面分配空间.
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (node < 0)
page = alloc_page(gfp_mask); // 针对 UMA
else
page = alloc_pages_node(node, gfp_mask, 0); // 针对 NUMA
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() 已经分配了i页,不能够完成的分配成功*/
area->nr_pages = i;
goto fail;
}
area->pages[i] = page;
}
//将刚申请的页面映射到页表中。
if (map_vm_area(area, prot, &pages))
goto fail;
return area->addr;
fail:
vfree(area->addr); //释放掉这个虚拟块(vfree)
return NULL;
}
_get_vm_area_node函数从VMALLOC_START开始遍历vmlist链表,将申请到的vm_struct结构插入到 vm_list链表中.函数如下:
static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long flags, unsigned long start, unsigned long end,
int node, gfp_t gfp_mask, void *caller)
{
static struct vmap_area *va;
struct vm_struct *area;
struct vm_struct *tmp, **p;
unsigned long align = 1;
BUG_ON(in_interrupt());
//如果指定了VM_IOREMAP.则调整对齐因子
if (flags & VM_IOREMAP) {
int bit = fls(size);
if (bit > IOREMAP_MAX_ORDER)
bit = IOREMAP_MAX_ORDER;
else if (bit < PAGE_SHIFT)
bit = PAGE_SHIFT;
align = 1ul << bit;
}
size = PAGE_ALIGN(size);
if (unlikely(!size))
return NULL;
//使用kmalloc_node()在slab中,分配一个vm_struct结构.
area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area))
return NULL;
/*
* We always allocate a guard page.
*/
size += PAGE_SIZE; //PAGE_SIZE:在i32中为4KB,指的是间隔空洞
//在start到end中,分配足够size大小的内核虚拟空间
va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
if (IS_ERR(va)) {
kfree(area);
return NULL;
}
//设置area各成员值
area->flags = flags;
area->addr = (void *)va->va_start;
area->size = size;
area->pages = NULL;
area->nr_pages = 0;
area->phys_addr = 0;
area->caller = caller;
va->private = area;
va->flags |= VM_VM_AREA;
//加上写锁
write_lock(&vmlist_lock);
//遍历vmlist链表,将area插入到前后两者间间隙放得下area的位置
for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
if (tmp->addr >= area->addr)
break;
}
area->next = *p;
*p = area;
//解除写锁
write_unlock(&vmlist_lock);
return area;
}
/*
* Allocate a region of KVA of the specified size and alignment, within the
* vstart and vend.
*/
static struct vmap_area *alloc_vmap_area(unsigned long size,
unsigned long align,
unsigned long vstart, unsigned long vend,
int node, gfp_t gfp_mask)
{
struct vmap_area *va;
struct rb_node *n;
unsigned long addr;
int purged = 0;
BUG_ON(size & ~PAGE_MASK);
va = kmalloc_node(sizeof(struct vmap_area),
gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!va))
return ERR_PTR(-ENOMEM);
retry:
将起始地址按照对齐因子对齐
addr = ALIGN(vstart, align);
spin_lock(&vmap_area_lock);
/* XXX: could have a last_hole cache */
n = vmap_area_root.rb_node;
if (n) {
struct vmap_area *first = NULL;
do {
struct vmap_area *tmp;
tmp = rb_entry(n, struct vmap_area, rb_node);
if (tmp->va_end >= addr) { //若起始地址落在某一个vm区间,则调整起始地址为vm区间的末尾
if (!first && tmp->va_start < addr + size)
first = tmp;
n = n->rb_left;
} else {
first = tmp;
n = n->rb_right;
}
} while (n);
if (!first)
goto found;
if (first->va_end < addr) {
n = rb_next(&first->rb_node);
if (n)
first = rb_entry(n, struct vmap_area, rb_node);
else
goto found;
}
while (addr + size > first->va_start && addr + size <= vend) {
addr = ALIGN(first->va_end + PAGE_SIZE, align);
n = rb_next(&first->rb_node);
if (n)
first = rb_entry(n, struct vmap_area, rb_node);
else
goto found;
}
}
found:
if (addr + size > vend) {
spin_unlock(&vmap_area_lock);
if (!purged) {
purge_vmap_area_lazy();
purged = 1;
goto retry;
}
if (printk_ratelimit())
printk(KERN_WARNING "vmap allocation failed: "
"use vmalloc=<size> to increase size./n");
return ERR_PTR(-EBUSY);
}
BUG_ON(addr & (align-1));
va->va_start = addr;
va->va_end = addr + size;
va->flags = 0;
__insert_vmap_area(va);
spin_unlock(&vmap_area_lock);
return va;
}
Linux vmalloc的实现
最新推荐文章于 2024-08-26 00:55:43 发布