linux内核drm中 GEM,TTM mmap 机制

最新推荐文章于 2023-11-29 17:13:02 发布

TheMoonRiver

最新推荐文章于 2023-11-29 17:13:02 发布

阅读量2.9k

点赞数

分类专栏： AMDGPU内核驱动分析 linux 内存管理图形文章标签： linux 内存管理 gpgpu

本文链接：https://blog.csdn.net/xiaozhi_botton/article/details/99733236

版权

AMDGPU内核驱动分析同时被 3 个专栏收录

19 篇文章 33 订阅

订阅专栏

图形

7 篇文章 1 订阅

订阅专栏

linux 内存管理

5 篇文章 1 订阅

订阅专栏

文章目录

Linux 一切皆文件的mmap

man 2 mmap

摘取一部分

DESCRIPTION
    The  contents of a file mapping (as opposed to an anonymous mapping; see MAP_ANONYMOUS below), are initialized using length bytes starting at offset offset in the file (or other object) referred to by the file descriptor fd.  offset must be a multiple of the
       page size as returned by sysconf(_SC_PAGE_SIZE).

// XXX:
After the mmap() call has returned, the file descriptor, fd, can be closed immediately without invalidating the mapping.



The flags argument

		MAP_SHARED   //drm中大部分都是需要进程share的
              Share  this  mapping.   Updates to the mapping are visible to other processes mapping the same region, and (in the case of file-backed mappings) are carried through to the underlying file.  (To precisely control when updates are carried through to the
              underlying file requires the use of msync(2).)
       MAP_SHARED_VALIDATE (since Linux 4.15)
              This flag provides the same behavior as MAP_SHARED except that MAP_SHARED mappings ignore unknown flags in flags.  By contrast, when creating a mapping using MAP_SHARED_VALIDATE, the kernel verifies all passed flags are known  and  fails  the  mapping
              with the error EOPNOTSUPP for unknown flags.  This mapping type is also required to be able to use some mapping flags (e.g., MAP_SYNC).
        MAP_POPULATE (since Linux 2.5.46)  //上菜之前先做好（pre page fault）
              Populate (prefault) page tables for a mapping.  For a file mapping, this causes read-ahead on the file.  This will help to reduce blocking on page faults later.  The mmap() call doesn't fail if the mapping cannot be populated (for example, due to lim‐
              itations on the number of mapped huge pages when using MAP_HUGETLB).  MAP_POPULATE is supported for private mappings only since Linux 2.6.23.

GEM 的分析

在非Android和LP64平台， drm_mmap的原型就是mmap()

/* assume large file support exists */
#  define drm_mmap(addr, length, prot, flags, fd, offset) \
              mmap(addr, length, prot, flags, fd, offset)

内核侧看MIPS的吧(其实都一样)

SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len,
	unsigned long, prot, unsigned long, flags, unsigned long,
	fd, off_t, offset)
{
	if (offset & ~PAGE_MASK)
		return -EINVAL;
	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
			       offset >> PAGE_SHIFT);
}
//内核的SYSCALL的形参定义和umd的定义保持的一致

看ksys_mmap_pgoff吧

static struct file *__fget(unsigned int fd, fmode_t mask)
{
	struct files_struct *files = current->files;
	struct file *file;

	rcu_read_lock();
loop:
	file = fcheck_files(files, fd);
	if (file) {
		/* File object ref couldn't be taken.
		 * dup2() atomicity guarantee is the reason
		 * we loop to catch the new file (or NULL pointer)
		 */
		if (file->f_mode & mask)
			file = NULL;
		else if (!get_file_rcu(file))
			goto loop;
	}
	rcu_read_unlock();

	return file;
}




unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
			      unsigned long prot, unsigned long flags,
			      unsigned long fd, unsigned long pgoff)
{
	struct file *file = NULL;
	unsigned long retval;

	if (!(flags & MAP_ANONYMOUS)) {
		audit_mmap_fd(fd, flags);
		file = fget(fd);  @看上边的current（当前task_struct）
		if (!file)
			return -EBADF;
		if (is_file_hugepages(file))
			len = ALIGN(len, huge_page_size(hstate_file(file)));
		retval = -EINVAL;
		if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
			goto out_fput;
	} else if (flags & MAP_HUGETLB) { @ huge page ,需要在创建时就指定好， 在map时flag标记
		struct user_struct *user = NULL;
		struct hstate *hs;

		hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
		if (!hs)
			return -EINVAL;

		len = ALIGN(len, huge_page_size(hs));
		/*
		 * VM_NORESERVE is used because the reservations will be
		 * taken when vm_ops->mmap() is called
		 * A dummy user value is used because we are not locking
		 * memory so no accounting is necessary
		 */
		file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
				VM_NORESERVE,
				&user, HUGETLB_ANONHUGE_INODE,
				(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
		if (IS_ERR(file))
			return PTR_ERR(file);
	}

	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);

	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
out_fput:
	if (file)
		fput(file);
	return retval;
}

上边的ksys_mmap_pgoff，通过fd找见了确定的struct file，那就开始找一个GEM吧

vm_mmap_pgoff()会处理保护机制，然后调到do_mmap,
1. do_mmap进行 pg_off和len在当前进程的mm_struct 空间申请一个当前的addr( Obtain the address to map to.) (current->mm->get_unmapped_area)
2. 准备映射啦 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);

unsigned long mmap_region(struct file *file, unsigned long addr,
		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
		struct list_head *uf)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma, *prev;
	int error;
	struct rb_node **rb_link, *rb_parent;
	unsigned long charged = 0;

	/* Check against address space limit. */
	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
		unsigned long nr_pages;

		/*
		 * MAP_FIXED may remove pages of mappings that intersects with
		 * requested mapping. Account for the pages it would unmap.
		 */
		nr_pages = count_vma_pages_range(mm, addr, addr + len);

		if (!may_expand_vm(mm, vm_flags,
					(len >> PAGE_SHIFT) - nr_pages))
			return -ENOMEM;
	}

	/* Clear old maps */
	while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
			      &rb_parent)) {
		if (do_munmap(mm, addr, len, uf))
			return -ENOMEM;
	}

	/*
	 * Private writable mapping: check memory availability
	 */
	if (accountable_mapping(file, vm_flags)) {
		charged = len >> PAGE_SHIFT;
		if (security_vm_enough_memory_mm(mm, charged))
			return -ENOMEM;
		vm_flags |= VM_ACCOUNT;
	}

	/*
	 * Can we just expand an old mapping?
	 */
	vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
			NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
	if (vma)
		goto out;

	/*
	 * Determine the object being mapped and call the appropriate
	 * specific mapper. the address has already been validated, but
	 * not unmapped, but the maps are removed from the list.
	 */
	vma = vm_area_alloc(mm);
	if (!vma) {
		error = -ENOMEM;
		goto unacct_error;
	}

	vma->vm_start = addr;
	vma->vm_end = addr + len;
	vma->vm_flags = vm_flags;
	vma->vm_page_prot = vm_get_page_prot(vm_flags);
	vma->vm_pgoff = pgoff;

	if (file) {
		if (vm_flags & VM_DENYWRITE) {
			error = deny_write_access(file);
			if (error)
				goto free_vma;
		}
		if (vm_flags & VM_SHARED) {
			error = mapping_map_writable(file->f_mapping);
			if (error)
				goto allow_write_and_free_vma;
		}

		/* ->mmap() can change vma->vm_file, but must guarantee that
		 * vma_link() below can deny write-access if VM_DENYWRITE is set
		 * and map writably if VM_SHARED is set. This usually means the
		 * new file must not have been exposed to user-space, yet.
		 */
		vma->vm_file = get_file(file);
		error = call_mmap(file, vma);
		if (error)
			goto unmap_and_free_vma;

		/* Can addr have changed??
		 *
		 * Answer: Yes, several device drivers can do it in their
		 *         f_op->mmap method. -DaveM
		 * Bug: If addr is changed, prev, rb_link, rb_parent should
		 *      be updated for vma_link()
		 */
		WARN_ON_ONCE(addr != vma->vm_start);

		addr = vma->vm_start;
		vm_flags = vma->vm_flags;
	} else if (vm_flags & VM_SHARED) {
		error = shmem_zero_setup(vma);
		if (error)
			goto free_vma;
	} else {
		vma_set_anonymous(vma);
	}

	vma_link(mm, vma, prev, rb_link, rb_parent);
	/* Once vma denies write, undo our temporary denial count */
	if (file) {
		if (vm_flags & VM_SHARED)
			mapping_unmap_writable(file->f_mapping);
		if (vm_flags & VM_DENYWRITE)
			allow_write_access(file);
	}
	file = vma->vm_file;
out:
	perf_event_mmap(vma);

	vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
	if (vm_flags & VM_LOCKED) {
		if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
					is_vm_hugetlb_page(vma) ||
					vma == get_gate_vma(current->mm))
			vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
		else
			mm->locked_vm += (len >> PAGE_SHIFT);
	}

	if (file)
		uprobe_mmap(vma);

	/*
	 * New (or expanded) vma always get soft dirty status.
	 * Otherwise user-space soft-dirty page tracker won't
	 * be able to distinguish situation when vma area unmapped,
	 * then new mapped in-place (which must be aimed as
	 * a completely new data area).
	 */
	vma->vm_flags |= VM_SOFTDIRTY;

	vma_set_page_prot(vma);

	return addr;

unmap_and_free_vma:
	vma->vm_file = NULL;
	fput(file);

	/* Undo any partial mapping done by a device driver. */
	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
	charged = 0;
	if (vm_flags & VM_SHARED)
		mapping_unmap_writable(file->f_mapping);
allow_write_and_free_vma:
	if (vm_flags & VM_DENYWRITE)
		allow_write_access(file);
free_vma:
	vm_area_free(vma);
unacct_error:
	if (charged)
		vm_unacct_memory(charged);
	return error;
}

准备好vma(kmem_cache_alloc), 开始调用真正的error = call_mmap(file, vma);
这里，我们发现，vma是一个链表啊（我知），vma是在真正用的时候才产出，省地儿
那，请问vma都啥时候alloc呢(>不多，三四个地<)

先研究error = call_mmap(file, vma);也就是没个file对应的 file->f_op->mmap(file, vma);
这个实现就得往驱动里面看了。

接下来分三个不同的mmap方式介绍一下

看GEM通用处理

#define DEFINE_DRM_GEM_FOPS(name) \
	static const struct file_operations name = {\
		.owner		= THIS_MODULE,\
		.open		= drm_open,\
		.release	= drm_release,\
		.unlocked_ioctl	= drm_ioctl,\
		.compat_ioctl	= drm_compat_ioctl,\
		.poll		= drm_poll,\
		.read		= drm_read,\
		.llseek		= noop_llseek,\
		.mmap		= drm_gem_mmap,\
	}

int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct drm_file *priv = filp->private_data;
	struct drm_device *dev = priv->minor->dev;
	struct drm_gem_object *obj = NULL;
	struct drm_vma_offset_node *node;
	int ret;

	if (drm_dev_is_unplugged(dev))
		return -ENODEV;

	drm_vma_offset_lock_lookup(dev->vma_offset_manager); // 设备地址管理的总体rw锁
	node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
						  vma->vm_pgoff,
						  vma_pages(vma));
	if (likely(node)) {// 通过pgoff和vmasize找到node
		obj = container_of(node, struct drm_gem_object, vma_node);
		//知道node， 通过地址索引反推gemobj
		/*
		 * When the object is being freed, after it hits 0-refcnt it
		 * proceeds to tear down the object. In the process it will
		 * attempt to remove the VMA offset and so acquire this
		 * mgr->vm_lock.  Therefore if we find an object with a 0-refcnt
		 * that matches our range, we know it is in the process of being
		 * destroyed and will be freed as soon as we release the lock -
		 * so we have to check for the 0-refcnted object and treat it as
		 * invalid.
		 */
		if (!kref_get_unless_zero(&obj->refcount))
			obj = NULL;
	}
	drm_vma_offset_unlock_lookup(dev->vma_offset_manager);

	if (!obj)
		return -EINVAL;

	if (!drm_vma_node_is_allowed(node, priv)) {
		drm_gem_object_put_unlocked(obj);
		return -EACCES;
	}

	if (node->readonly) {
		if (vma->vm_flags & VM_WRITE) {
			drm_gem_object_put_unlocked(obj);
			return -EINVAL;
		}

		vma->vm_flags &= ~VM_MAYWRITE;
	}
//找到GEM obj就map到gem上vma_ops
	ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT,
			       vma);

	drm_gem_object_put_unlocked(obj);

	return ret;
}
EXPORT_SYMBOL(drm_gem_mmap);

int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
		     struct vm_area_struct *vma)
{
	struct drm_device *dev = obj->dev;

	/* Check for valid size. */
	if (obj_size < vma->vm_end - vma->vm_start)
		return -EINVAL;

	if (!dev->driver->gem_vm_ops)
		return -EINVAL;

	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;// vm属性被reset
	vma->vm_ops = dev->driver->gem_vm_ops; //驱动自己实现的faul。open处理vma
	vma->vm_private_data = obj; //vma的priv存gem
	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);//属性reset

	/* Take a ref for this mapping of the object, so that the fault
	 * handler can dereference the mmap offset's pointer to the object.
	 * This reference is cleaned up by the corresponding vm_close
	 * (which should happen whether the vma was created by this call, or
	 * by a vm_open due to mremap or partial unmap or whatever).
	 */
	drm_gem_object_get(obj);//啥时候put，需要自己找去吧

	return 0;
}
EXPORT_SYMBOL(drm_gem_mmap_obj);


static const struct vm_operations_struct vm_ops = {
	.fault = etnaviv_gem_fault, //处理page fault, 真正上菜的过程shmem, kvmalloc_array（slab）， sgt 的过程， 最后vmf_insert_page(vma, vmf->address, page);
	.open = drm_gem_vm_open,
	.close = drm_gem_vm_close,
};

二看GEM_CMA(contingues memory alloc, 给那些没独立显存玩的)处理(kirin)

#define DEFINE_DRM_GEM_CMA_FOPS(name) \
	static const struct file_operations name = {\
		.owner		= THIS_MODULE,\
		.open		= drm_open,\
		.release	= drm_release,\
		.unlocked_ioctl	= drm_ioctl,\
		.compat_ioctl	= drm_compat_ioctl,\
		.poll		= drm_poll,\
		.read		= drm_read,\
		.llseek		= noop_llseek,\
		.mmap		= drm_gem_cma_mmap,\
		DRM_GEM_CMA_UNMAPPED_AREA_FOPS \
	}

//看起来并没有page fault？   drm_gem_cma_create 和  drm_vma_node_allow ，mmap时drm_gem_cma_mmap_obj，昂， 直接给用上了DMA_zone 的 attr是DMA_ATTR_WRITE_COMBINE的了
const struct vm_operations_struct drm_gem_cma_vm_ops = {
	.open = drm_gem_vm_open,
	.close = drm_gem_vm_close,
};

EXPORT_SYMBOL_GPL(drm_gem_cma_vm_ops);

三看GEM TTM， AMDGPU独显的

static const struct file_operations amdgpu_driver_kms_fops = {
	.owner = THIS_MODULE,
	.open = drm_open,
	.flush = amdgpu_flush,
	.release = drm_release,
	.unlocked_ioctl = amdgpu_drm_ioctl,
	.mmap = amdgpu_mmap,
	.poll = drm_poll,
	.read = drm_read,
#ifdef CONFIG_COMPAT
	.compat_ioctl = amdgpu_kms_compat_ioctl,
#endif
};
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct drm_file *file_priv;
	struct amdgpu_device *adev;

//XXX 这，当前vma的空间得落在drm才算数   (0x100000000ULL >> PAGE_SHIFT) ， 高4G
	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) 
		return -EINVAL;

	file_priv = filp->private_data;
	adev = file_priv->minor->dev->dev_private;
	if (adev == NULL)
		return -EINVAL;

	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);// 因为amdgpu的存储backend是TTM的(translation table maps)的， 所以需要让TTM判断当爱情来临， vma该去哪找对象去
}

这里的&adev->mman.bdev我们得看ttm_bo_init_mm在哪调用，用了几回了

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|1737 col 13| adev->mman.bdev.no_retry = true;//额， 没好呢，等会着
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|1740 col 33| r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|1802 col 33| r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|1822 col 34| r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|1832 col 34| r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|1842 col 34| r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,

这回我们知道了，我们的TTM大神很忙啊，管理了好多地方的存储 amdgpu给了TTM_PL_VRAM（VRAM哈）, `TTM_PL_TT （GTT-可访问的systemram）, AMDGPU_PL_GDS（gfx的global data share区） AMDGPU_PL_GWS（global wave share） AMDGPU_PL_OA(en，不晓得了，哈哈)，
TTM将以上资源站在cpu老大哥的视角，看一下子这些资源都多少page(cpu的page)，分配时候就按page来
真正的vma是bo(ttm_buffer_object)
唠远了。。。回来

int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
		struct ttm_bo_device *bdev)
{
	struct ttm_bo_driver *driver;
	struct ttm_buffer_object *bo;
	int ret;

	bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma));
	if (unlikely(!bo))
		return -EINVAL;

	driver = bo->bdev->driver;
	if (unlikely(!driver->verify_access)) {
		ret = -EPERM;
		goto out_unref;
	}
	ret = driver->verify_access(bo, filp); // 这个bo让你访问不？ 主要ttm管的东西太多，有的东西不在cpu这侧，抱歉， 不在这侧的真没法访问
	if (unlikely(ret != 0))
		goto out_unref;

	vma->vm_ops = &ttm_bo_vm_ops;

	/*
	 * Note: We're transferring the bo reference to
	 * vma->vm_private_data here.
	 */

	vma->vm_private_data = bo;

	/*
	 * We'd like to use VM_PFNMAP on shared mappings, where
	 * (vma->vm_flags & VM_SHARED) != 0, for performance reasons,
	 * but for some reason VM_PFNMAP + x86 PAT + write-combine is very
	 * bad for performance. Until that has been sorted out, use
	 * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719
	 */
	vma->vm_flags |= VM_MIXEDMAP;
	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
	return 0;
out_unref:
	ttm_bo_put(bo);
	return ret;
}
EXPORT_SYMBOL(ttm_bo_mmap);


static const struct vm_operations_struct ttm_bo_vm_ops = {
	.fault = ttm_bo_vm_fault, //上菜
	.open = ttm_bo_vm_open,
	.close = ttm_bo_vm_close,
	.access = ttm_bo_vm_access
};

static struct ttm_bo_driver amdgpu_bo_driver = { // 辅助上菜
	.ttm_tt_create = &amdgpu_ttm_tt_create,
	.ttm_tt_populate = &amdgpu_ttm_tt_populate,  //上菜上菜
	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate, 
	.invalidate_caches = &amdgpu_invalidate_caches,
	.init_mem_type = &amdgpu_init_mem_type,
	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
	.evict_flags = &amdgpu_evict_flags,
	.move = &amdgpu_bo_move,  
	.verify_access = &amdgpu_verify_access, 
	.move_notify = &amdgpu_bo_move_notify,
	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
	.io_mem_free = &amdgpu_ttm_io_mem_free,
	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
	.access_memory = &amdgpu_ttm_access_memory
};

static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;
	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
	    vma->vm_private_data;
	struct ttm_bo_device *bdev = bo->bdev;
	unsigned long page_offset;
	unsigned long page_last;
	unsigned long pfn;
	struct ttm_tt *ttm = NULL;
	struct page *page;
	int err;
	int i;
	vm_fault_t ret = VM_FAULT_NOPAGE;
	unsigned long address = vmf->address;
	struct ttm_mem_type_manager *man =
		&bdev->man[bo->mem.mem_type];
	struct vm_area_struct cvma;

	/*
	 * Work around locking order reversal in fault / nopfn
	 * between mmap_sem and bo_reserve: Perform a trylock operation
	 * for reserve, and if it fails, retry the fault after waiting
	 * for the buffer to become unreserved.
	 */
	err = ttm_bo_reserve(bo, true, true, NULL);
	if (unlikely(err != 0)) {
		if (err != -EBUSY)
			return VM_FAULT_NOPAGE;

		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
				ttm_bo_get(bo);
				up_read(&vmf->vma->vm_mm->mmap_sem);
				(void) ttm_bo_wait_unreserved(bo);
				ttm_bo_put(bo);
			}

			return VM_FAULT_RETRY;
		}

		/*
		 * If we'd want to change locking order to
		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
		 * instead of retrying the fault...
		 */
		return VM_FAULT_NOPAGE;
	}

	/*
	 * Refuse to fault imported pages. This should be handled
	 * (if at all) by redirecting mmap to the exporter.
	 */
	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
		ret = VM_FAULT_SIGBUS;
		goto out_unlock;
	}

	if (bdev->driver->fault_reserve_notify) {
		err = bdev->driver->fault_reserve_notify(bo);
		switch (err) {
		case 0:
			break;
		case -EBUSY:
		case -ERESTARTSYS:
			ret = VM_FAULT_NOPAGE;
			goto out_unlock;
		default:
			ret = VM_FAULT_SIGBUS;
			goto out_unlock;
		}
	}

	/*
	 * Wait for buffer data in transit, due to a pipelined
	 * move.
	 */
	ret = ttm_bo_vm_fault_idle(bo, vmf);
	if (unlikely(ret != 0)) {
		if (ret == VM_FAULT_RETRY &&
		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
			/* The BO has already been unreserved. */
			return ret;
		}

		goto out_unlock;
	}

	err = ttm_mem_io_lock(man, true);
	if (unlikely(err != 0)) {
		ret = VM_FAULT_NOPAGE;
		goto out_unlock;
	}
	err = ttm_mem_io_reserve_vm(bo);
	if (unlikely(err != 0)) {
		ret = VM_FAULT_SIGBUS;
		goto out_io_unlock;
	}

	page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
		vma->vm_pgoff - drm_vma_node_start(&bo->vma_node);
	page_last = vma_pages(vma) + vma->vm_pgoff -
		drm_vma_node_start(&bo->vma_node);

	if (unlikely(page_offset >= bo->num_pages)) {
		ret = VM_FAULT_SIGBUS;
		goto out_io_unlock;
	}

	/*
	 * Make a local vma copy to modify the page_prot member
	 * and vm_flags if necessary. The vma parameter is protected
	 * by mmap_sem in write mode.
	 */
	cvma = *vma;
	cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);

	if (bo->mem.bus.is_iomem) {
		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
						cvma.vm_page_prot);
	} else {
		struct ttm_operation_ctx ctx = {
			.interruptible = false,
			.no_wait_gpu = false,
			.flags = TTM_OPT_FLAG_FORCE_ALLOC

		};

		ttm = bo->ttm;
		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
						cvma.vm_page_prot);

		/* Allocate all page at once, most common usage */
		if (ttm_tt_populate(ttm, &ctx)) {
			ret = VM_FAULT_OOM;
			goto out_io_unlock;
		}
	}

	/*
	 * Speculatively prefault a number of pages. Only error on
	 * first page.
	 */
	 //提前先给你16页， 省的总fault
	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
		if (bo->mem.bus.is_iomem) {
			/* Iomem should not be marked encrypted */
			cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
		} else {
			page = ttm->pages[page_offset];
			if (unlikely(!page && i == 0)) {
				ret = VM_FAULT_OOM;
				goto out_io_unlock;
			} else if (unlikely(!page)) {
				break;
			}
			page->index = drm_vma_node_start(&bo->vma_node) +
				page_offset;
			pfn = page_to_pfn(page);
		}

		if (vma->vm_flags & VM_MIXEDMAP)
			ret = vmf_insert_mixed(&cvma, address,
					__pfn_to_pfn_t(pfn, PFN_DEV));
		else
			ret = vmf_insert_pfn(&cvma, address, pfn);

		/*
		 * Somebody beat us to this PTE or prefaulting to
		 * an already populated PTE, or prefaulting error.
		 */

		if (unlikely((ret == VM_FAULT_NOPAGE && i > 0)))
			break;
		else if (unlikely(ret & VM_FAULT_ERROR))
			goto out_io_unlock;

		address += PAGE_SIZE;
		if (unlikely(++page_offset >= page_last))
			break;
	}
	ret = VM_FAULT_NOPAGE;
out_io_unlock:
	ttm_mem_io_unlock(man);
out_unlock:
	ttm_bo_unreserve(bo);
	return ret;
}