iommu_map 是export出的用于映射的函数
在iommu_map 中主要的就是通过iommu_pgsize 得要需要映射的page size,通过调用domain->ops->map 来真正做映射
int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
while (size) {
size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
iova, &paddr, pgsize);
ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
}
}
从iommu_pgsize中的到pagesize
static size_t iommu_pgsize(struct iommu_domain *domain,
unsigned long addr_merge, size_t size)
{
unsigned int pgsize_idx;
size_t pgsize;
#从size中找找到最大的page size
pgsize_idx = __fls(size);
/* need to consider alignment requirements ? */
if (likely(addr_merge)) {
/* Max page size allowed by address */
unsigned int align_pgsize_idx = __ffs(addr_merge);
pgsize_idx = min(pgsize_idx, align_pgsize_idx);
}
/* build a mask of acceptable page sizes */
pgsize = (1UL << (pgsize_idx + 1)) - 1;
#看page size是否是hw support
pgsize &= domain->pgsize_bitmap;
/* make sure we're still sane */
BUG_ON(!pgsize);
#找到最大的page size
pgsize_idx = __fls(pgsize);
pgsize = 1UL << pgsize_idx;
return pgsize;
}
在iommu_pgsize 最终要就是找到最大的page size,这个page size且是要hw support,
static int __init arm_lpae_do_selftests(void)
{
static const unsigned long pgsize[] = {
SZ_4K | SZ_2M | SZ_1G,
SZ_16K | SZ_32M,
SZ_64K | SZ_512M,
};
static const unsigned int ias[] = {
32, 36, 40, 42, 44, 48,
};
int i, j, pass = 0, fail = 0;
struct io_pgtable_cfg cfg = {
.tlb = &dummy_tlb_ops,
.oas = 48,
};
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
for (j = 0; j < ARRAY_SIZE(ias); ++j) {
cfg.pgsize_bitmap = pgsize[i];
}
从arm_lpae_do_selftests 这个自测函数中可以看到pgsize_bitmap可以取值SZ_4K到SZ_512M。因此最大的page size 也必须在这个范围内,实际在arm smmu v3中的arm_smmu_device_hw_probe 中会根据IDR5 这个寄存器来给pgsize_bitmap 赋值,具体code如下:
读取IDR5寄存器
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
根据bit4/5/6来决定最大支持的page size,优先级从高到低
/* Page sizes */
if (reg & IDR5_GRAN64K)
smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
if (reg & IDR5_GRAN16K)
smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
if (reg & IDR5_GRAN4K)
smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
再回到iommu_map 中得到最大的page size后,就调用domain->ops->map 来映射,之前的博客已经分析过首先会调用arm_smmu_ops 中的map函数arm_smmu_map
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
int ret;
unsigned long flags;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
if (!ops)
return -ENODEV;
spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
ret = ops->map(ops, iova, paddr, size, prot);
spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
return ret;
}
arm_smmu_map 有辗转调到arm_lpae_map函数,详见之前的博客
static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int iommu_prot)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte *ptep = data->pgd;
int ret, lvl = ARM_LPAE_START_LVL(data);
arm_lpae_iopte prot;
/* If no access, then nothing to do */
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
prot = arm_lpae_prot_to_pte(data, iommu_prot);
ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
/*
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
*/
wmb();
return ret;
}
这个函数从arm_lpae_io_pgtable 中的到arm_lpae_iopte,然后调用__arm_lpae_map 来map
static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
int lvl, arm_lpae_iopte *ptep)
{
arm_lpae_iopte *cptep, pte;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
/* Find our entry at the current level */
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
/* If we can install a leaf entry at this level, then do so */
if (size == block_size && (size & cfg->pgsize_bitmap))
return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
/* We can't allocate tables at the final level */
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
return -EINVAL;
/* Grab a pointer to the next level */
pte = *ptep;
if (!pte) {
cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
GFP_ATOMIC, cfg);
if (!cptep)
return -ENOMEM;
pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NSTABLE;
__arm_lpae_set_pte(ptep, pte, cfg);
} else {
cptep = iopte_deref(pte, data);
}
/* Rinse, repeat */
return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep);
}
__arm_lpae_map 中首先得到block_size, size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
如果要映射的size等于block_size,则调用arm_lpae_init_pte 来初始化pte
smmu映射的时候类似mmu分为pgd/pmd/pte等类似,但是级别不能超过四个
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
return -EINVAL;
如果arm_lpae_iopte 也就是ptep里面的值为空,
if (!pte) {
cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
GFP_ATOMIC, cfg);
if (!cptep)
return -ENOMEM;
pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NSTABLE;
__arm_lpae_set_pte(ptep, pte, cfg);
} else {
cptep = iopte_deref(pte, data);
}
则为arm_lpae_iopte 申请memory,然后得到物理地址,调用__arm_lpae_set_pte 来设定到pte里面
static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
struct io_pgtable_cfg *cfg)
{
*ptep = pte;
if (!selftest_running)
dma_sync_single_for_device(cfg->iommu_dev,
__arm_lpae_dma_addr(ptep),
sizeof(pte), DMA_TO_DEVICE);
}
因为selftest_running == false。所以__arm_lpae_set_pte 就只有一行code。
如果ptep的值不是null,则调用iopte_deref来得到cptep.
如果size 不是block_size的话,则递归调用__arm_lpae_map,可见size 等于block_size 是递归的出口条件.
在iommu_map 中主要的就是通过iommu_pgsize 得要需要映射的page size,通过调用domain->ops->map 来真正做映射
int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
while (size) {
size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
iova, &paddr, pgsize);
ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
}
}
从iommu_pgsize中的到pagesize
static size_t iommu_pgsize(struct iommu_domain *domain,
unsigned long addr_merge, size_t size)
{
unsigned int pgsize_idx;
size_t pgsize;
#从size中找找到最大的page size
pgsize_idx = __fls(size);
/* need to consider alignment requirements ? */
if (likely(addr_merge)) {
/* Max page size allowed by address */
unsigned int align_pgsize_idx = __ffs(addr_merge);
pgsize_idx = min(pgsize_idx, align_pgsize_idx);
}
/* build a mask of acceptable page sizes */
pgsize = (1UL << (pgsize_idx + 1)) - 1;
#看page size是否是hw support
pgsize &= domain->pgsize_bitmap;
/* make sure we're still sane */
BUG_ON(!pgsize);
#找到最大的page size
pgsize_idx = __fls(pgsize);
pgsize = 1UL << pgsize_idx;
return pgsize;
}
在iommu_pgsize 最终要就是找到最大的page size,这个page size且是要hw support,
static int __init arm_lpae_do_selftests(void)
{
static const unsigned long pgsize[] = {
SZ_4K | SZ_2M | SZ_1G,
SZ_16K | SZ_32M,
SZ_64K | SZ_512M,
};
static const unsigned int ias[] = {
32, 36, 40, 42, 44, 48,
};
int i, j, pass = 0, fail = 0;
struct io_pgtable_cfg cfg = {
.tlb = &dummy_tlb_ops,
.oas = 48,
};
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
for (j = 0; j < ARRAY_SIZE(ias); ++j) {
cfg.pgsize_bitmap = pgsize[i];
}
从arm_lpae_do_selftests 这个自测函数中可以看到pgsize_bitmap可以取值SZ_4K到SZ_512M。因此最大的page size 也必须在这个范围内,实际在arm smmu v3中的arm_smmu_device_hw_probe 中会根据IDR5 这个寄存器来给pgsize_bitmap 赋值,具体code如下:
读取IDR5寄存器
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
根据bit4/5/6来决定最大支持的page size,优先级从高到低
/* Page sizes */
if (reg & IDR5_GRAN64K)
smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
if (reg & IDR5_GRAN16K)
smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
if (reg & IDR5_GRAN4K)
smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
再回到iommu_map 中得到最大的page size后,就调用domain->ops->map 来映射,之前的博客已经分析过首先会调用arm_smmu_ops 中的map函数arm_smmu_map
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
int ret;
unsigned long flags;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
if (!ops)
return -ENODEV;
spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
ret = ops->map(ops, iova, paddr, size, prot);
spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
return ret;
}
arm_smmu_map 有辗转调到arm_lpae_map函数,详见之前的博客
static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int iommu_prot)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte *ptep = data->pgd;
int ret, lvl = ARM_LPAE_START_LVL(data);
arm_lpae_iopte prot;
/* If no access, then nothing to do */
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
prot = arm_lpae_prot_to_pte(data, iommu_prot);
ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
/*
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
*/
wmb();
return ret;
}
这个函数从arm_lpae_io_pgtable 中的到arm_lpae_iopte,然后调用__arm_lpae_map 来map
static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
int lvl, arm_lpae_iopte *ptep)
{
arm_lpae_iopte *cptep, pte;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
/* Find our entry at the current level */
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
/* If we can install a leaf entry at this level, then do so */
if (size == block_size && (size & cfg->pgsize_bitmap))
return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
/* We can't allocate tables at the final level */
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
return -EINVAL;
/* Grab a pointer to the next level */
pte = *ptep;
if (!pte) {
cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
GFP_ATOMIC, cfg);
if (!cptep)
return -ENOMEM;
pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NSTABLE;
__arm_lpae_set_pte(ptep, pte, cfg);
} else {
cptep = iopte_deref(pte, data);
}
/* Rinse, repeat */
return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep);
}
__arm_lpae_map 中首先得到block_size, size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
如果要映射的size等于block_size,则调用arm_lpae_init_pte 来初始化pte
smmu映射的时候类似mmu分为pgd/pmd/pte等类似,但是级别不能超过四个
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
return -EINVAL;
如果arm_lpae_iopte 也就是ptep里面的值为空,
if (!pte) {
cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
GFP_ATOMIC, cfg);
if (!cptep)
return -ENOMEM;
pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NSTABLE;
__arm_lpae_set_pte(ptep, pte, cfg);
} else {
cptep = iopte_deref(pte, data);
}
则为arm_lpae_iopte 申请memory,然后得到物理地址,调用__arm_lpae_set_pte 来设定到pte里面
static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
struct io_pgtable_cfg *cfg)
{
*ptep = pte;
if (!selftest_running)
dma_sync_single_for_device(cfg->iommu_dev,
__arm_lpae_dma_addr(ptep),
sizeof(pte), DMA_TO_DEVICE);
}
因为selftest_running == false。所以__arm_lpae_set_pte 就只有一行code。
如果ptep的值不是null,则调用iopte_deref来得到cptep.
如果size 不是block_size的话,则递归调用__arm_lpae_map,可见size 等于block_size 是递归的出口条件.