在qemu-master/hw/vfio/common.c 中的vfio_dma_map 中会通过VFIO_IOMMU_MAP_DMA命令让kernel中的vfio来将起始地址是iova ,大小是size的空间做iommu map
static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
{
struct vfio_iommu_type1_dma_map map = {
.argsz = sizeof(map),
.flags = VFIO_DMA_MAP_FLAG_READ,
.vaddr = (__u64)(uintptr_t)vaddr,
.iova = iova,
.size = size,
};
if (!readonly) {
map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
}
/*
* Try the mapping, if it fails with EBUSY, unmap the region and try
* again. This shouldn't be necessary, but we sometimes see it in
* the VGA ROM space.
*/
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
(errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
return 0;
}
error_report("VFIO_MAP_DMA: %d", -errno);
return -errno;
}
举个简单的例子
/* Allocate some space and setup a DMA mapping */
dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
dma_map.size = 1024 * 1024;
dma_map.iova = 0; /* 1MB starting at 0x0 from device view */
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
这段code就是将0~1M的这段memory 做iommu map.
这个VFIO_IOMMU_MAP_DMA 对应kernel中的vfio_iommu_driver_ops 中的ioctl实现vfio_iommu_type1_ioctl
static long vfio_iommu_type1_ioctl(void *iommu_data,
unsigned int cmd, unsigned long arg)
{
struct vfio_iommu *iommu = iommu_data;
unsigned long minsz;
else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
VFIO_DMA_MAP_FLAG_WRITE;
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
if (copy_from_user(&map, (void __user *)arg, minsz))
return -EFAULT;
if (map.argsz < minsz || map.flags & ~mask)
return -EINVAL;
return vfio_dma_do_map(iommu, &map);
}
}
在vfio_iommu_type1_ioctl中通过copy_from_user将数据从user space copy到kernel space后,继续调用vfio_dma_do_map来将0~1M(举例)这段 虚拟地址做映射
static int vfio_dma_do_map(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_map *map)
{
dma_addr_t iova = map->iova;
unsigned long vaddr = map->vaddr;
size_t size = map->size;
long npage;
int ret = 0, prot = 0;
uint64_t mask;
struct vfio_dma *dma;
unsigned long pfn;
mutex_lock(&iommu->lock);
//查找iova是否已经被映射过。
if (vfio_find_dma(iommu, iova, size)) {
mutex_unlock(&iommu->lock);
return -EEXIST;
}
dma = kzalloc(sizeof(*dma), GFP_KERNEL);
if (!dma) {
mutex_unlock(&iommu->lock);
return -ENOMEM;
}
//iova 是设备dma使用的地址
dma->iova = iova;
//vaddr是cpu使用的地址
dma->vaddr = vaddr;
dma->prot = prot;
//将这个struct vfio_dma *dma 插入到rb tree中,这样下次通过vfio_find_dma 查找的时候就不为null了.
vfio_link_dma(iommu, dma);
while (size) {
/* Pin a contiguous chunk of memory */
//map是一段一段映射的,例如如果memory是由3Page+1Page+4Page这三段连续的
npage = vfio_pin_pages(vaddr + dma->size,
size >> PAGE_SHIFT, prot, &pfn);
if (npage <= 0) {
WARN_ON(!npage);
ret = (int)npage;
break;
}
//这里就调用vfio_iommu_map映射,其中就会调用iommu_map 这个通用API来映射.
ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
if (ret) {
vfio_unpin_pages(pfn, npage, prot, true);
break;
}
size -= npage << PAGE_SHIFT;
dma->size += npage << PAGE_SHIFT;
}
if (ret)
vfio_remove_dma(iommu, dma);
mutex_unlock(&iommu->lock);
return ret;
}
static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
{
struct vfio_iommu_type1_dma_map map = {
.argsz = sizeof(map),
.flags = VFIO_DMA_MAP_FLAG_READ,
.vaddr = (__u64)(uintptr_t)vaddr,
.iova = iova,
.size = size,
};
if (!readonly) {
map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
}
/*
* Try the mapping, if it fails with EBUSY, unmap the region and try
* again. This shouldn't be necessary, but we sometimes see it in
* the VGA ROM space.
*/
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
(errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
return 0;
}
error_report("VFIO_MAP_DMA: %d", -errno);
return -errno;
}
举个简单的例子
/* Allocate some space and setup a DMA mapping */
dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
dma_map.size = 1024 * 1024;
dma_map.iova = 0; /* 1MB starting at 0x0 from device view */
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
这段code就是将0~1M的这段memory 做iommu map.
这个VFIO_IOMMU_MAP_DMA 对应kernel中的vfio_iommu_driver_ops 中的ioctl实现vfio_iommu_type1_ioctl
static long vfio_iommu_type1_ioctl(void *iommu_data,
unsigned int cmd, unsigned long arg)
{
struct vfio_iommu *iommu = iommu_data;
unsigned long minsz;
else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
VFIO_DMA_MAP_FLAG_WRITE;
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
if (copy_from_user(&map, (void __user *)arg, minsz))
return -EFAULT;
if (map.argsz < minsz || map.flags & ~mask)
return -EINVAL;
return vfio_dma_do_map(iommu, &map);
}
}
在vfio_iommu_type1_ioctl中通过copy_from_user将数据从user space copy到kernel space后,继续调用vfio_dma_do_map来将0~1M(举例)这段 虚拟地址做映射
static int vfio_dma_do_map(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_map *map)
{
dma_addr_t iova = map->iova;
unsigned long vaddr = map->vaddr;
size_t size = map->size;
long npage;
int ret = 0, prot = 0;
uint64_t mask;
struct vfio_dma *dma;
unsigned long pfn;
mutex_lock(&iommu->lock);
//查找iova是否已经被映射过。
if (vfio_find_dma(iommu, iova, size)) {
mutex_unlock(&iommu->lock);
return -EEXIST;
}
dma = kzalloc(sizeof(*dma), GFP_KERNEL);
if (!dma) {
mutex_unlock(&iommu->lock);
return -ENOMEM;
}
//iova 是设备dma使用的地址
dma->iova = iova;
//vaddr是cpu使用的地址
dma->vaddr = vaddr;
dma->prot = prot;
//将这个struct vfio_dma *dma 插入到rb tree中,这样下次通过vfio_find_dma 查找的时候就不为null了.
vfio_link_dma(iommu, dma);
while (size) {
/* Pin a contiguous chunk of memory */
//map是一段一段映射的,例如如果memory是由3Page+1Page+4Page这三段连续的
npage = vfio_pin_pages(vaddr + dma->size,
size >> PAGE_SHIFT, prot, &pfn);
if (npage <= 0) {
WARN_ON(!npage);
ret = (int)npage;
break;
}
//这里就调用vfio_iommu_map映射,其中就会调用iommu_map 这个通用API来映射.
ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
if (ret) {
vfio_unpin_pages(pfn, npage, prot, true);
break;
}
size -= npage << PAGE_SHIFT;
dma->size += npage << PAGE_SHIFT;
}
if (ret)
vfio_remove_dma(iommu, dma);
mutex_unlock(&iommu->lock);
return ret;
}