ROCT虚拟内存分配代码分析

denglin12315

已于 2022-06-21 09:15:36 修改

阅读量480

点赞数

文章标签： ROCT 内存分配

于 2022-05-13 14:11:57 首次发布

本文链接：https://blog.csdn.net/denglin12315/article/details/124750810

版权

该代码段实现了一个内存管理函数，用于在管理区（manageable_aperture_t）中分配对齐的虚拟地址。它首先确保对齐满足最小要求，然后寻找足够大的空闲区域（hole）来分配内存。如果找不到合适的空间，返回NULL。此外，它还处理了内存碎片问题，通过选择开始或结束对齐来优化内存布局。当指定地址时，会检查其是否与现有内存区域冲突。

摘要由CSDN通过智能技术生成

static void *virtual_address_allocate_aligned(manageable_aperture_t *app,
						void *address,
						uint64_t MemorySizeInBytes,
						uint64_t align)
{
	uint64_t offset = 0, orig_align = align;
	vm_area_t *cur, *next;
	void *start;

	if (align < app->align)
		align = app->align;

	/* Align big buffers to the next power-of-2 up to huge page
	 * size for flexible fragment size TLB optimizations
	 * 将align尽量取小于GPU_HUGE_PAGE_SIZE(2MB)的最大值
	 */
	while (align < GPU_HUGE_PAGE_SIZE && MemorySizeInBytes >= (align << 1))
		align <<= 1;

	/* If no specific alignment was requested, align the end of
	 * buffers instead of the start. For fragment optimizations,
	 * aligning the start or the end achieves the same effective
	 * optimization. End alignment to the TLB cache line size is
	 * needed as a workaround for TLB issues on some older GPUs.
	 * 如果这个if条件成立，采用vm_area的end对齐到align addr,否则，
	 * 采用默认的vm_area的start对齐到align addr.
	 * 
	 * (1)vm_area的start对齐到align addr的方式：
	 * |<------vm_area1------>|........|<-------vm_arean---------|------->|
	 *                             start(alignd_addr)		alignd_addr	 end
	 * (2)vm_area的end对齐到align addr的方式：
	 * |<------vm_area1------>|........|<--------|----vm_arean---------------->|
	 *                             alignd_addr start(noaligned_addr)	 end(alignd_addr)
	 */
	if (orig_align <= (uint64_t)PAGE_SIZE)
		offset = align - (MemorySizeInBytes & (align - 1));		/* 当前申请end addr到下一个aligned addr的offset */

	MemorySizeInBytes = vm_align_area_size(app, MemorySizeInBytes);

	/* Find a big enough "hole" in the address space */
	cur = NULL;
	next = app->vm_ranges;
	start = address ? address :
		(void *)(ALIGN_UP((uint64_t)app->base, align) + offset);
	
	/* 
	 * 单链表遍历,目的是找个一个可以容纳此次申请size的hole,分为三种情况：
	 * (1):第一次申请，hole为aper->start 到 aper->limit
	 * (2):hole为两个vm_area之间的空闲区域
	 * (3):hole为aper->base到第一个vm_area之间的区域
	 * (4):hole为最后一个vm_area到aper->limit之间的区域（注意，这种情况在后面的
	 *     “if (!next && VOID_PTRS_SUB(app->limit, start) + 1 < MemorySizeInBytes)”来处理）
	 */
	while (next) {
		/* 在两个vm_area之间找到一段空隙，并且该段空隙size满足本次分配需求 */
		if (next->start > start &&
		    VOID_PTRS_SUB(next->start, start) >= MemorySizeInBytes)
			break;

		/* 找到后两个vm_area之间的空隙 */
		cur = next;
		next = next->next;

		/* 更新在新的两个vm_area之间的空隙可分配内存的起始地址 */
		if (!address)
			start = (void *)(ALIGN_UP((uint64_t)cur->end + 1, align) + offset);
	}

	/* 
	 * (1) next为NULL表示在目前已有的vm_area之间都没有足够大的hole容纳此次申请的size 
	 * (2) VOID_PTRS_SUB(app->limit, start) + 1 < MemorySizeInBytes判断从最后一个有
	 *     效的vm_area(链表最后一个节点)的end到aper的limit之间的hole可否容纳本次申请size
	 */
	if (!next && VOID_PTRS_SUB(app->limit, start) + 1 < MemorySizeInBytes)
		/* No hole found and not enough space after the last area */
		return NULL;

	/* 校验用户指定申请的start VA是否落在当前hole(遍历单向链表计算得到的) */
	if (cur && address && address < (void *)ALIGN_UP((uint64_t)cur->end + 1, align))
		/* Required address is not free or overlaps */
		return NULL;

	/* 如果当前申请的start刚好与hole的前一个vm_area->end无缝连接，那么直接扩展当前vm_area */
	if (cur && VOID_PTR_ADD(cur->end, 1) == start) {
		/* extend existing area */
		cur->end = VOID_PTR_ADD(start, MemorySizeInBytes-1);

	/* 否则，创建一个新的vm_area */
	} else {
		vm_area_t *new_area;
		/* create a new area between cur and next */
		new_area = vm_create_and_init_area(start,
				VOID_PTR_ADD(start, (MemorySizeInBytes - 1)));
		if (!new_area)
			return NULL;
		/* 
		 * 旧链表结构：
		 * cur->next结构
		 * 转换为：
		 * cur->new_area->next结构
		 */
		new_area->next = next;
		new_area->prev = cur;
		if (cur)
			cur->next = new_area;
		else
			app->vm_ranges = new_area;
		if (next)
			next->prev = new_area;
	}

	return start;
}