longkgsl_ioctl_gpumem_alloc(structkgsl_device_private*dev_priv,unsignedint cmd,void*data){// ioctl参数structkgsl_gpumem_alloc*param = data;// kgsl_mem_entry用于描述用户空间的内存分配[见2.1节]structkgsl_mem_entry*entry;// 用户空间指定的标志位uint64_t flags = param->flags;/*
* On 64 bit kernel, secure memory region is expanded and
* moved to 64 bit address, 32 bit apps can not access it from
* this IOCTL.
*/if((param->flags & KGSL_MEMFLAGS_SECURE)&&is_compat_task()&&test_bit(KGSL_MMU_64BIT,&device->mmu.features))return-EOPNOTSUPP;/* Legacy functions doesn't support these advanced features */
flags &=~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);if(is_compat_task())
flags |= KGSL_MEMFLAGS_FORCE_32BIT;// 创建kgsl_mem_entry[见2.2节]
entry =gpumem_alloc_entry(dev_priv,(uint64_t) param->size, flags);if(IS_ERR(entry))returnPTR_ERR(entry);// 更新参数
param->gpuaddr =(unsignedlong) entry->memdesc.gpuaddr;
param->size =(size_t) entry->memdesc.size;
param->flags =(unsignedint) entry->memdesc.flags;/* Put the extra ref from kgsl_mem_entry_create() */// 减少引用计数, 如果引用计数减为0则通过kgsl_mem_entry_destroy释放kgsl_mem_entrykgsl_mem_entry_put(entry);return0;}
2.1 kgsl_mem_entry
/*
* struct kgsl_mem_entry - a userspace memory allocation
*/structkgsl_mem_entry{// Currently userspace can only hold a single reference count but the kernel may hold morestructkref refcount;// description of the memory[见2.1.1节]structkgsl_memdesc memdesc;// type-specific data, such as the dma-buf attachment pointervoid*priv_data;// rb_node for the gpu address lookup rb treestructrb_node node;// idr index for this entry, can be used to find memory that does not have a valid GPU addressunsignedint id;// 持有该内存的进程structkgsl_process_private*priv;// if !0, userspace requested that his memory be freed, but there are still references to itint pending_free;// String containing user specified metadata for the entrychar metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX +1];// used to schedule a kgsl_mem_entry_put in atomic contextsstructwork_struct work;/**
* @map_count: Count how many vmas this object is mapped in - used for
* debugfs accounting
*/// 映射的VMA数量atomic_t map_count;};
2.1.1 kgsl_memdesc
/**
* struct kgsl_memdesc - GPU memory object descriptor
*/structkgsl_memdesc{// 此块对象映射的页表structkgsl_pagetable*pagetable;// CPU(进程)虚拟地址void*hostptr;// 使用CPU虚拟地址的线程个数unsignedint hostptr_count;// GPU虚拟地址uint64_t gpuaddr;// 该内存对象的物理地址phys_addr_t physaddr;// 该内存对象的物理内存大小uint64_t size;// Internal flags and settingsunsignedint priv;structsg_table*sgt;// 操作这块内存的函数[见2.1.2节]conststructkgsl_memdesc_ops*ops;// 用户空间申请内存时设置的标志位(Flags set from userspace)uint64_t flags;structdevice*dev;// dma attributes for this memoryunsignedlong attrs;// An array of pointers to allocated pages// 申请的物理页面数组structpage**pages;// Total number of pages allocated// 申请的物理页面数量unsignedint page_count;/*
* @lock: Spinlock to protect the gpuaddr from being accessed by
* multiple entities trying to map the same SVM region at once
*/spinlock_t lock;};
2.1.2 kgsl_memdesc_ops
// 具体实现见2.2.5节kgsl_page_opsstructkgsl_memdesc_ops{unsignedint vmflags;vm_fault_t(*vmfault)(structkgsl_memdesc*memdesc,structvm_area_struct*vma,structvm_fault*vmf);// 释放内存void(*free)(structkgsl_memdesc*memdesc);// 映射到内核虚拟地址空间int(*map_kernel)(structkgsl_memdesc*memdesc);// 解映射void(*unmap_kernel)(structkgsl_memdesc*memdesc);/**
* @put_gpuaddr: Put away the GPU address and unmap the memory
* descriptor
*/void(*put_gpuaddr)(structkgsl_memdesc*memdesc);};
2.2 gpumem_alloc_entry
structkgsl_mem_entry*gpumem_alloc_entry(structkgsl_device_private*dev_priv,uint64_t size,uint64_t flags){int ret;structkgsl_process_private*private = dev_priv->process_priv;structkgsl_mem_entry*entry;structkgsl_mmu*mmu =&dev_priv->device->mmu;unsignedint align;
flags &= KGSL_MEMFLAGS_GPUREADONLY
| KGSL_CACHEMODE_MASK
| KGSL_MEMTYPE_MASK
| KGSL_MEMALIGN_MASK
| KGSL_MEMFLAGS_USE_CPU_MAP
| KGSL_MEMFLAGS_SECURE
| KGSL_MEMFLAGS_FORCE_32BIT
| KGSL_MEMFLAGS_IOCOHERENT
| KGSL_MEMFLAGS_GUARD_PAGE;/* Return not supported error if secure memory isn't enabled */if(!kgsl_mmu_is_secured(mmu)&&(flags & KGSL_MEMFLAGS_SECURE)){dev_WARN_ONCE(dev_priv->device->dev,1,"Secure memory not supported");returnERR_PTR(-EOPNOTSUPP);}/* Cap the alignment bits to the highest number we can handle */
align =MEMFLAGS(flags, KGSL_MEMALIGN_MASK, KGSL_MEMALIGN_SHIFT);if(align >=ilog2(KGSL_MAX_ALIGN)){dev_err(dev_priv->device->dev,"Alignment too large; restricting to %dK\n",
KGSL_MAX_ALIGN >>10);
flags &=~((uint64_t) KGSL_MEMALIGN_MASK);
flags |=(uint64_t)((ilog2(KGSL_MAX_ALIGN)<<
KGSL_MEMALIGN_SHIFT)&
KGSL_MEMALIGN_MASK);}/* For now only allow allocations up to 4G */if(size ==0|| size > UINT_MAX)returnERR_PTR(-EINVAL);// 更新缓存策略
flags =kgsl_filter_cachemode(flags);// 前面主要完成标志位的校验和更新// 这里开始创建kgsl_mem_entry[见2.2.1节]
entry =kgsl_mem_entry_create();if(entry ==NULL)returnERR_PTR(-ENOMEM);// 根据标志位判断是否是cached bufferif(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT)&&kgsl_cachemode_is_cached(flags))
flags |= KGSL_MEMFLAGS_IOCOHERENT;// 私有内存分配[2.2.2节]
ret =kgsl_allocate_user(dev_priv->device,&entry->memdesc,
size, flags,0);if(ret !=0)goto err;// 将该内存绑定到kgsl进程[2.2.7节]
ret =kgsl_mem_entry_attach_process(dev_priv->device, private, entry);if(ret !=0){kgsl_sharedmem_free(&entry->memdesc);goto err;}kgsl_process_add_stats(private,kgsl_memdesc_usermem_type(&entry->memdesc),
entry->memdesc.size);trace_kgsl_mem_alloc(entry);// 将kgsl_mem_entry提交到kgsl_process_private, 以便其他操作也能够访问kgsl_mem_entry_commit_process(entry);return entry;
err:kfree(entry);returnERR_PTR(ret);}
2.2.1 kgsl_mem_entry_create
staticstructkgsl_mem_entry*kgsl_mem_entry_create(void){// 创建kgsl_mem_entrystructkgsl_mem_entry*entry =kzalloc(sizeof(*entry), GFP_KERNEL);if(entry !=NULL){// 初始化kgsl_mem_entry引用计数为1kref_init(&entry->refcount);/* put this ref in userspace memory alloc and map ioctls */// 引用计数加1kref_get(&entry->refcount);// 初始化映射的VMA数量为0atomic_set(&entry->map_count,0);}return entry;}
voidkgsl_memdesc_init(structkgsl_device*device,structkgsl_memdesc*memdesc,uint64_t flags){structkgsl_mmu*mmu =&device->mmu;unsignedint align;// 初始化kgsl_memdescmemset(memdesc,0,sizeof(*memdesc));/* Turn off SVM if the system doesn't support it */// 判断是否支持KGSL_MMU_IOPGTABLEif(!kgsl_mmu_is_perprocess(mmu))
flags &=~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);/* Secure memory disables advanced addressing modes */if(flags & KGSL_MEMFLAGS_SECURE)
flags &=~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);/* Disable IO coherence if it is not supported on the chip */// 判断是否支持I/O coherencyif(!kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT)){
flags &=~((uint64_t) KGSL_MEMFLAGS_IOCOHERENT);WARN_ONCE(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT),"I/O coherency is not supported on this target\n");}elseif(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT))
flags |= KGSL_MEMFLAGS_IOCOHERENT;/*
* We can't enable I/O coherency on uncached surfaces because of
* situations where hardware might snoop the cpu caches which can
* have stale data. This happens primarily due to the limitations
* of dma caching APIs available on arm64
*/if(!kgsl_cachemode_is_cached(flags))
flags &=~((u64) KGSL_MEMFLAGS_IOCOHERENT);if(kgsl_mmu_has_feature(device, KGSL_MMU_NEED_GUARD_PAGE)||(flags & KGSL_MEMFLAGS_GUARD_PAGE))
memdesc->priv |= KGSL_MEMDESC_GUARD_PAGE;if(flags & KGSL_MEMFLAGS_SECURE)
memdesc->priv |= KGSL_MEMDESC_SECURE;// 设置标志位
memdesc->flags = flags;// 设置持有该内存的device
memdesc->dev =&device->pdev->dev;// 对齐
align =max_t(unsignedint,kgsl_memdesc_get_align(memdesc),ilog2(PAGE_SIZE));// 设置kgsl_memdesc的对齐标志位kgsl_memdesc_set_align(memdesc, align);spin_lock_init(&memdesc->lock);}
staticint_kgsl_alloc_pages(structkgsl_memdesc*memdesc,
u64 size,structpage***pages,structdevice*dev){int count =0;// 将内存大小转换为页面数量int npages = size >> PAGE_SHIFT;// attempt to allocate physically contiguous memory by kmalloc// but upon failure, fall back to non-contiguous (vmalloc) allocationstructpage**local =kvcalloc(npages,sizeof(*local), GFP_KERNEL);
u32 page_size, align;
u64 len = size;if(!local)return-ENOMEM;// 共享内存设置成功或者未配置CONFIG_QCOM_KGSL_USE_SHMEM则返回0[见2.2.6.1节]
count =kgsl_memdesc_file_setup(memdesc, size);if(count){kvfree(local);return count;}/* Start with 1MB alignment to get the biggest page we can */
align =ilog2(SZ_1M);// 根据内存大小计算页面大小
page_size =kgsl_get_page_size(len, align);while(len){// 调用kgsl_pool_alloc_page分配, 并将获取的page通过local数组返回int ret =kgsl_alloc_page(&page_size,&local[count],
npages,&align, count, memdesc->shmem_filp, dev);if(ret ==-EAGAIN)continue;elseif(ret <=0){int i;for(i =0; i < count;){int n =1<<compound_order(local[i]);kgsl_free_page(local[i]);
i += n;}kvfree(local);if(!kgsl_sharedmem_noretry_flag)pr_err_ratelimited("kgsl: out of memory: only allocated %lldKb of %lldKb requested\n",(size - len)>>10, size >>10);if(memdesc->shmem_filp)fput(memdesc->shmem_filp);return-ENOMEM;}
count += ret;
npages -= ret;
len -= page_size;
page_size =kgsl_get_page_size(len, align);}// pages作为返回值*pages = local;return count;}
/*
* Attach the memory object to a process by (possibly) getting a GPU address and
* (possibly) mapping it
*/staticintkgsl_mem_entry_attach_process(structkgsl_device*device,structkgsl_process_private*process,structkgsl_mem_entry*entry){int id, ret;// kgsl_process_private引用计数加1
ret =kgsl_process_private_get(process);if(!ret)return-EBADF;// [见2.2.7.1节]
ret =kgsl_mem_entry_track_gpuaddr(device, process, entry);if(ret){kgsl_process_private_put(process);return ret;}idr_preload(GFP_KERNEL);spin_lock(&process->mem_lock);/* Allocate the ID but don't attach the pointer just yet */// 为kgsl_mem_entry分配id
id =idr_alloc(&process->mem_idr,NULL,1,0, GFP_NOWAIT);spin_unlock(&process->mem_lock);idr_preload_end();if(id <0){if(!kgsl_memdesc_use_cpu_map(&entry->memdesc))kgsl_mmu_put_gpuaddr(&entry->memdesc);kgsl_process_private_put(process);return id;}
entry->id = id;
entry->priv = process;/*
* Map the memory if a GPU address is already assigned, either through
* kgsl_mem_entry_track_gpuaddr() or via some other SVM process
*/// GPU虚拟地址分配成功if(entry->memdesc.gpuaddr){// [见2.2.7.6节]
ret =kgsl_mmu_map(entry->memdesc.pagetable,&entry->memdesc);if(ret)kgsl_mem_entry_detach_process(entry);}kgsl_memfree_purge(entry->memdesc.pagetable, entry->memdesc.gpuaddr,
entry->memdesc.size);return ret;}
2.2.7.1 kgsl_mem_entry_track_gpuaddr
/* Allocate a IOVA for memory objects that don't use SVM */staticintkgsl_mem_entry_track_gpuaddr(structkgsl_device*device,structkgsl_process_private*process,structkgsl_mem_entry*entry){structkgsl_pagetable*pagetable;/*
* If SVM is enabled for this object then the address needs to be
* assigned elsewhere
* Also do not proceed further in case of NoMMU.
*/// 不支持IOMMU则直接返回if(kgsl_memdesc_use_cpu_map(&entry->memdesc)||(kgsl_mmu_get_mmutype(device)== KGSL_MMU_TYPE_NONE))return0;// 使用kgsl进程页表
pagetable =kgsl_memdesc_is_secured(&entry->memdesc)?
device->mmu.securepagetable : process->pagetable;// 获取GPU虚拟地址[见2.2.7.2节]returnkgsl_mmu_get_gpuaddr(pagetable,&entry->memdesc);}
2.2.7.2 kgsl_mmu_get_gpuaddr
#definePT_OP_VALID(_pt, _field)\(((_pt)!=NULL)&&\((_pt)->pt_ops !=NULL)&&\((_pt)->pt_ops->_field !=NULL))/**
* kgsl_mmu_get_gpuaddr - Assign a GPU address to the memdesc
* @pagetable: GPU pagetable to assign the address in
* @memdesc: mem descriptor to assign the memory to
*
* Return: 0 on success or negative on failure
*/staticinlineintkgsl_mmu_get_gpuaddr(structkgsl_pagetable*pagetable,structkgsl_memdesc*memdesc){// 调用iommu_pt_ops中定义的kgsl_iommu_get_gpuaddr分配GPU虚拟地址[2.2.7.5节]if(PT_OP_VALID(pagetable, get_gpuaddr))return pagetable->pt_ops->get_gpuaddr(pagetable, memdesc);return-ENOMEM;}
2.2.7.3 kgsl_iommu_get_gpuaddr
staticintkgsl_iommu_get_gpuaddr(structkgsl_pagetable*pagetable,structkgsl_memdesc*memdesc){structkgsl_iommu_pt*pt = pagetable->priv;int ret =0;uint64_t addr, start, end, size;unsignedint align;if(WARN_ON(kgsl_memdesc_use_cpu_map(memdesc)))return-EINVAL;if(memdesc->flags & KGSL_MEMFLAGS_SECURE &&
pagetable->name != KGSL_MMU_SECURE_PT)return-EINVAL;// 获取映射区域(kgsl_memdesc)的大小
size =kgsl_memdesc_footprint(memdesc);
align =max_t(uint64_t,1<<kgsl_memdesc_get_align(memdesc),
PAGE_SIZE);if(memdesc->flags & KGSL_MEMFLAGS_FORCE_32BIT){
start = pagetable->compat_va_start;
end = pagetable->compat_va_end;}else{// 页表的起始虚拟地址
start = pt->va_start;// 页表的结束虚拟地址
end = pt->va_end;}spin_lock(&pagetable->lock);// 获取一块未映射的虚拟地址[2.2.7.4节]
addr =_get_unmapped_area(pagetable, start, end, size, align);if(addr ==(uint64_t)-ENOMEM){
ret =-ENOMEM;goto out;}/*
* This path is only called in a non-SVM path with locks so we can be
* sure we aren't racing with anybody so we don't need to worry about
* taking the lock
*/// 将该虚拟地址插入页表[2.2.7.5节]
ret =_insert_gpuaddr(pagetable, addr, size);if(ret ==0){// 设置GPU虚拟地址
memdesc->gpuaddr = addr;// 设置页表
memdesc->pagetable = pagetable;}
out:spin_unlock(&pagetable->lock);return ret;}
2.2.7.4 _get_unmapped_area
/*
* struct kgsl_iommu_addr_entry - entry in the kgsl_pagetable rbtree.
* @base: starting virtual address of the entry
* @size: size of the entry
* @node: the rbtree node
*/structkgsl_iommu_addr_entry{// 起始虚拟地址uint64_t base;uint64_t size;structrb_node node;};staticuint64_t_get_unmapped_area(structkgsl_pagetable*pagetable,uint64_t bottom,uint64_t top,uint64_t size,uint64_t align){// 页表radix tree头节点structrb_node*node =rb_first(&pagetable->rbtree);uint64_t start;
bottom =ALIGN(bottom, align);
start = bottom;while(node !=NULL){uint64_t gap;// 查找rb_node的容器即kgsl_iommu_addr_entrystructkgsl_iommu_addr_entry*entry =rb_entry(node,structkgsl_iommu_addr_entry, node);/*
* Skip any entries that are outside of the range, but make sure
* to account for some that might straddle the lower bound
*/if(entry->base < bottom){if(entry->base + entry->size > bottom)
start =ALIGN(entry->base + entry->size, align);
node =rb_next(node);continue;}/* Stop if we went over the top */if(entry->base >= top)break;/* Make sure there is a gap to consider */if(start < entry->base){
gap = entry->base - start;if(gap >= size)return start;}/* Stop if there is no more room in the region */if(entry->base + entry->size >= top)return(uint64_t)-ENOMEM;/* Start the next cycle at the end of the current entry */
start =ALIGN(entry->base + entry->size, align);
node =rb_next(node);}// 返回起始虚拟地址if(start + size <= top)return start;return(uint64_t)-ENOMEM;}
intkgsl_mmu_map(structkgsl_pagetable*pagetable,structkgsl_memdesc*memdesc){int size;structkgsl_device*device =KGSL_MMU_DEVICE(pagetable->mmu);if(!memdesc->gpuaddr)return-EINVAL;/* Only global mappings should be mapped multiple times */// KGSL_MEMDESC_MAPPED标志位用于判断kgsl_memdesc是否被映射:只有全局共享内存才能映射多次if(!kgsl_memdesc_is_global(memdesc)&&(KGSL_MEMDESC_MAPPED & memdesc->priv))return-EINVAL;
size =kgsl_memdesc_footprint(memdesc);if(PT_OP_VALID(pagetable, mmu_map)){int ret;// 调用iommu_pt_ops中定义的kgsl_iommu_map[见2.2.7.7节]
ret = pagetable->pt_ops->mmu_map(pagetable, memdesc);if(ret)return ret;atomic_inc(&pagetable->stats.entries);// 内存统计KGSL_STATS_ADD(size,&pagetable->stats.mapped,&pagetable->stats.max_mapped);kgsl_mmu_trace_gpu_mem_pagetable(pagetable);if(!kgsl_memdesc_is_global(memdesc)&&!(memdesc->flags & KGSL_MEMFLAGS_USERMEM_ION)){kgsl_trace_gpu_mem_total(device, size);}// 标记此块内存已经被映射
memdesc->priv |= KGSL_MEMDESC_MAPPED;}return0;}
2.2.7.6 kgsl_iommu_map
staticintkgsl_iommu_map(structkgsl_pagetable*pt,structkgsl_memdesc*memdesc){int ret;uint64_t addr = memdesc->gpuaddr;uint64_t size = memdesc->size;unsignedint flags =_get_protection_flags(pt, memdesc);structsg_table*sgt =NULL;/*
* For paged memory allocated through kgsl, memdesc->pages is not NULL.
* Allocate sgt here just for its map operation. Contiguous memory
* already has its sgt, so no need to allocate it here.
*/if(memdesc->pages !=NULL)
sgt =kgsl_alloc_sgt_from_pages(memdesc);else
sgt = memdesc->sgt;if(IS_ERR(sgt))returnPTR_ERR(sgt);
ret =_iommu_map_sg(pt, addr, sgt->sgl, sgt->nents, flags);if(ret)goto done;
ret =_iommu_map_guard_page(pt, memdesc, addr + size, flags);if(ret)_iommu_unmap(pt, addr, size);
done:if(memdesc->pages !=NULL)kgsl_free_sgt(sgt);return ret;}