static void *virtual_address_allocate_aligned(manageable_aperture_t *app,
void *address,
uint64_t MemorySizeInBytes,
uint64_t align)
{
uint64_t offset = 0, orig_align = align;
vm_area_t *cur, *next;
void *start;
if (align < app->align)
align = app->align;
/* Align big buffers to the next power-of-2 up to huge page
* size for flexible fragment size TLB optimizations
* 将align尽量取小于GPU_HUGE_PAGE_SIZE(2MB)的最大值
*/
while (align < GPU_HUGE_PAGE_SIZE && MemorySizeInBytes >= (align << 1))
align <<= 1;
/* If no specific alignment was requested, align the end of
* buffers instead of the start. For fragment optimizations,
* aligning the start or the end achieves the same effective
* optimization. End alignment to the TLB cache line size is
* needed as a workaround for TLB issues on some older GPUs.
* 如果这个if条件成立,采用vm_area的end对齐到align addr,否则,
* 采用默认的vm_area的start对齐到align addr.
*
* (1)vm_area的start对齐到align addr的方式:
* |<------vm_area1------>|........|<-------vm_arean---------|------->|
* start(alignd_addr) alignd_addr end
* (2)vm_area的end对齐到align addr的方式:
* |<------vm_area1------>|........|<--------|----vm_arean---------------->|
* alignd_addr start(noaligned_addr) end(alignd_addr)
*/
if (orig_align <= (uint64_t)PAGE_SIZE)
offset = align - (MemorySizeInBytes & (align - 1)); /* 当前申请end addr到下一个aligned addr的offset */
MemorySizeInBytes = vm_align_area_size(app, MemorySizeInBytes);
/* Find a big enough "hole" in the address space */
cur = NULL;
next = app->vm_ranges;
start = address ? address :
(void *)(ALIGN_UP((uint64_t)app->base, align) + offset);
/*
* 单链表遍历,目的是找个一个可以容纳此次申请size的hole,分为三种情况:
* (1):第一次申请,hole为aper->start 到 aper->limit
* (2):hole为两个vm_area之间的空闲区域
* (3):hole为aper->base到第一个vm_area之间的区域
* (4):hole为最后一个vm_area到aper->limit之间的区域(注意,这种情况在后面的
* “if (!next && VOID_PTRS_SUB(app->limit, start) + 1 < MemorySizeInBytes)”来处理)
*/
while (next) {
/* 在两个vm_area之间找到一段空隙,并且该段空隙size满足本次分配需求 */
if (next->start > start &&
VOID_PTRS_SUB(next->start, start) >= MemorySizeInBytes)
break;
/* 找到后两个vm_area之间的空隙 */
cur = next;
next = next->next;
/* 更新在新的两个vm_area之间的空隙可分配内存的起始地址 */
if (!address)
start = (void *)(ALIGN_UP((uint64_t)cur->end + 1, align) + offset);
}
/*
* (1) next为NULL表示在目前已有的vm_area之间都没有足够大的hole容纳此次申请的size
* (2) VOID_PTRS_SUB(app->limit, start) + 1 < MemorySizeInBytes判断从最后一个有
* 效的vm_area(链表最后一个节点)的end到aper的limit之间的hole可否容纳本次申请size
*/
if (!next && VOID_PTRS_SUB(app->limit, start) + 1 < MemorySizeInBytes)
/* No hole found and not enough space after the last area */
return NULL;
/* 校验用户指定申请的start VA是否落在当前hole(遍历单向链表计算得到的) */
if (cur && address && address < (void *)ALIGN_UP((uint64_t)cur->end + 1, align))
/* Required address is not free or overlaps */
return NULL;
/* 如果当前申请的start刚好与hole的前一个vm_area->end无缝连接,那么直接扩展当前vm_area */
if (cur && VOID_PTR_ADD(cur->end, 1) == start) {
/* extend existing area */
cur->end = VOID_PTR_ADD(start, MemorySizeInBytes-1);
/* 否则,创建一个新的vm_area */
} else {
vm_area_t *new_area;
/* create a new area between cur and next */
new_area = vm_create_and_init_area(start,
VOID_PTR_ADD(start, (MemorySizeInBytes - 1)));
if (!new_area)
return NULL;
/*
* 旧链表结构:
* cur->next结构
* 转换为:
* cur->new_area->next结构
*/
new_area->next = next;
new_area->prev = cur;
if (cur)
cur->next = new_area;
else
app->vm_ranges = new_area;
if (next)
next->prev = new_area;
}
return start;
}
ROCT虚拟内存分配代码分析
于 2022-05-13 14:11:57 首次发布
该代码段实现了一个内存管理函数,用于在管理区(manageable_aperture_t)中分配对齐的虚拟地址。它首先确保对齐满足最小要求,然后寻找足够大的空闲区域(hole)来分配内存。如果找不到合适的空间,返回NULL。此外,它还处理了内存碎片问题,通过选择开始或结束对齐来优化内存布局。当指定地址时,会检查其是否与现有内存区域冲突。
摘要由CSDN通过智能技术生成