linux iommu-STE表,CD表,iommu_map

格式

STE表

Level 1 Stream Table Descriptor
在这里插入图片描述描述见《IHI0070F_b_System_Memory_Management_Unit_Architecture_Specification》-5.1 Level 1 Stream Table Descriptor

Stream Table Entry
在这里插入图片描述描述见《IHI0070F_b_System_Memory_Management_Unit_Architecture_Specification》-5.2 Stream Table Entry

CD表

Level 1 Context Descriptor
在这里插入图片描述描述见《IHI0070F_b_System_Memory_Management_Unit_Architecture_Specification》-5.3 Level 1 Context Descriptor
Context Descriptor
在这里插入图片描述描述见《IHI0070F_b_System_Memory_Management_Unit_Architecture_Specification》-5.4 Context Descriptor

结构体

#define STRTAB_L1_SZ_SHIFT		20
#define STRTAB_SPLIT			8

#define STRTAB_L1_DESC_DWORDS		1
#define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
#define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)

#define STRTAB_STE_DWORDS		8

/* High-level stream table and context descriptor structures */
struct arm_smmu_strtab_l1_desc {
	u8				span;
	__le64				*l2ptr; 
	dma_addr_t			l2ptr_dma;
};
  • span: Level 1 Stream Table Descriptor的span,值等于STRTAB_SPLIT,代表Level 1 Stream Table Descriptor的个数和索引。
  • l2ptr: Level 1 Stream Table Descriptor的l2ptr指针,指向STE的L2表。
  • l2ptr_dma:l2ptr的PA用来填充Level 1 Stream Table Descriptor。

arm_smmu_device.strtab_cfg

struct arm_smmu_strtab_cfg {
	__le64				*strtab;
	dma_addr_t			strtab_dma;
	struct arm_smmu_strtab_l1_desc	*l1_desc;
	unsigned int			num_l1_ents;

	u64				strtab_base;
	u32				strtab_base_cfg;
};
  • strtab :STE表的VA。指向一个一个的Level 1 Stream Table Descriptor主要内容是L2Ptr和Span。Level 1 Stream Table Descriptor的详细描述见-《IHI0070F_b_System_Memory_Management_Unit_Architecture_Specification》Chapter 5
    Data structure formats-Level 1 Stream Table Descriptor
  • strtab_dma:STE表的PA。
  • l1_desc :STE的L1 desc的数组指针。
  • num_l1_ents STE的L1表有多少个。
  • strtab_base:STE的base地址(来自strtab_dma),需要写到ARM_SMMU_STRTAB_BASE寄存器。关于该寄存器见
    《IHI0070F_b_System_Memory_Management_Unit_Architecture_Specification-Chapter 6
    Memory map and registers-6.3.23 SMMU_STRTAB_BASE
  • strtab_base_cfg:STE的config,需要写到ARM_SMMU_STRTAB_BASE_CFG寄存器。关于该寄存器见《IHI0070F_b_System_Memory_Management_Unit_Architecture_Specification》-Chapter 6
    Memory map and registers-6.3.24 SMMU_STRTAB_BASE_CFG
struct arm_smmu_s1_cfg {
	__le64				*cdptr;
	dma_addr_t			cdptr_dma;

	struct arm_smmu_ctx_desc {
		u16	asid;
		u64	ttbr;
		u64	tcr;
		u64	mair;
	}				cd;
};

struct arm_smmu_s2_cfg {
	u16				vmid;
	u64				vttbr;
	u64				vtcr;
};

cdptr: 虚拟地址指向Level 1 Context Descriptor或者 Context Descriptor。
cdptr_dma:cdptr的物理地址,填充到相应Stream Table Entry,S1ContextPtr字段。
asid:ASID需要填充到相应Context Descripto,ASID字段。
ttbr:ttbr需要填充到相应Context Descriptor,TTB0字段。
tcr:tcr需要填充到相应Context Descriptor,IPS,EPD1,EPD0,SH0, OR0, IR0,TG0,T0SZ字段。
mair:mair需要填充到相应Context Descriptor,MAIR1和MAIR0字段。
vmid:VMID填充到相应Stream Table Entry,S2VMID字段
vttbr:vttbr填充到相应Stream Table Entry,S2TTB字段。
vtcr:vtcr填充到相应Stream Table Entry,S2PS,S2TG,S2SH0,S2OR0,S2IR0,S2SL0,S2T0SZ字段。

初始化

以 2 level的STE表为例子,arm_smmu_init_strtab_2lvl函数主要初始化了L1的STE表(Level 1 Stream Table Descriptor)。

static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
{
	void *strtab;
	u64 reg;
	u32 size, l1size;
	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;

	/* Calculate the L1 size, capped to the SIDSIZE. */
	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
	cfg->num_l1_ents = 1 << size;

	size += STRTAB_SPLIT;
	if (size < smmu->sid_bits)
		dev_warn(smmu->dev,
			 "2-level strtab only covers %u/%u bits of SID\n",
			 size, smmu->sid_bits);

	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
				     GFP_KERNEL | __GFP_ZERO);
	if (!strtab) {
		dev_err(smmu->dev,
			"failed to allocate l1 stream table (%u bytes)\n",
			size);
		return -ENOMEM;
	}
	cfg->strtab = strtab;

	/* Configure strtab_base_cfg for 2 levels */
	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
	cfg->strtab_base_cfg = reg;

	return arm_smmu_init_l1_strtab(smmu);
}

arm_smmu_init_l1_strtab函数主要初始化了l1_desc数组。

static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
{
	unsigned int i;
	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
	void *strtab = smmu->strtab_cfg.strtab;

	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
	if (!cfg->l1_desc) {
		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
		return -ENOMEM;
	}

	for (i = 0; i < cfg->num_l1_ents; ++i) {
		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
		
		strtab += STRTAB_L1_DESC_DWORDS << 3;
	}
	return 0;
}
  • 申请了l1_desc的数组。
  • arm_smmu_write_strtab_l1_desc函数将l1_desc数组中内容写到Level 1 Stream Table Descriptor中。

设备添加

以下流程添加smmu的dev的时候会,调用arm_smmu_init_l2_strtab函数填充STE的L2表(Stream Table Entry)
add_iommu_group/iommu_bus_notifier
–>iommu_probe_device
---->arm_smmu_add_device
------>arm_smmu_init_l2_strtab

arm_smmu_init_l2_strtab函数实现如下:

static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
{
	size_t size;
	void *strtab;
	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];//找到SID对应的l1 desc 

	if (desc->l2ptr)
		return 0;

	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);//申请的STE的L2表的大小是1<<(8+3+3),smmuv3中Stream Table Entry的每个表是512bit即64字节。size大小可以存放256个Stream Table Entry。
	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; //找到sid对应的strtab

	desc->span = STRTAB_SPLIT + 1; //span设置为9
	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
					  GFP_KERNEL | __GFP_ZERO);
	if (!desc->l2ptr) {
		dev_err(smmu->dev,
			"failed to allocate l2 stream table for SID %u\n",
			sid);
		return -ENOMEM;
	}

	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
	arm_smmu_write_strtab_l1_desc(strtab, desc);//将span和l2ptr的值写到sid对应的strtab中。
	return 0;
}

初始化时候通过arm_smmu_init_strtab_2lvl函数初始化了arm_smmu_device的Level 1 Stream Table Descriptor,这时候主要是申请地址。
arm_smmu_add_device通过arm_smmu_init_l2_strtab函数申请了对应StreamID的Stream Table Entry。同时将Stream Table Entry的地址写到对应的Level 1 Stream Table Descriptor中。至此该设备的Level 1 Stream Table Descriptor表就填写完成。

设备attach

调用流程如下
iommu_group_get_for_dev
–>iommu_group_add_device
---->__iommu_attach_device
------>arm_smmu_attach_dev
-------->arm_smmu_domain_finalise

arm_smmu_attach_dev是arm-smmu-v3的arm_smmu_ops中的attach_dev回调,实现如下:

static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
	int ret = 0;
	unsigned long flags;
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
	struct arm_smmu_device *smmu;
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_master *master;

	if (!fwspec)
		return -ENOENT;

	master = fwspec->iommu_priv;
	smmu = master->smmu;

	arm_smmu_detach_dev(master);

	mutex_lock(&smmu_domain->init_mutex);

	if (!smmu_domain->smmu) {
		smmu_domain->smmu = smmu;
		ret = arm_smmu_domain_finalise(domain);
		if (ret) {
			smmu_domain->smmu = NULL;
			goto out_unlock;
		}
	} else if (smmu_domain->smmu != smmu) {
		dev_err(dev,
			"cannot attach to SMMU %s (upstream of %s)\n",
			dev_name(smmu_domain->smmu->dev),
			dev_name(smmu->dev));
		ret = -ENXIO;
		goto out_unlock;
	}

	master->domain = smmu_domain;

	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
		master->ats_enabled = arm_smmu_ats_supported(master);

	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
		arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg);

	arm_smmu_install_ste_for_dev(master);

	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
	list_add(&master->domain_head, &smmu_domain->devices);
	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);

	arm_smmu_enable_ats(master);

out_unlock:
	mutex_unlock(&smmu_domain->init_mutex);
	return ret;
}

ATS的实现跳过,这里面该函数主要调用了三个函数

  • arm_smmu_domain_finalise :
  • arm_smmu_write_ctx_desc:主要将arm_smmu_s1_cfg结构体中asid,ttbr,tcr,mair填充到Context Descriptor表中。
  • arm_smmu_install_ste_for_dev:该函数主要

arm_smmu_domain_finalise函数实现如下:

static int arm_smmu_domain_finalise(struct iommu_domain *domain)
{
	int ret;
	unsigned long ias, oas;
	enum io_pgtable_fmt fmt;
	struct io_pgtable_cfg pgtbl_cfg;
	struct io_pgtable_ops *pgtbl_ops;
	int (*finalise_stage_fn)(struct arm_smmu_domain *,
				 struct io_pgtable_cfg *);
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_device *smmu = smmu_domain->smmu;

	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; //bypass模式直接return
		return 0;
	}

	/* Restrict the stage to what we can actually support */
	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;//配置为stage 2模式
	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;//配置为stage 1模式

	switch (smmu_domain->stage) {
	case ARM_SMMU_DOMAIN_S1://stage 1模式
		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
		ias = min_t(unsigned long, ias, VA_BITS);
		oas = smmu->ias;
		fmt = ARM_64_LPAE_S1;
		finalise_stage_fn = arm_smmu_domain_finalise_s1;
		break;
	case ARM_SMMU_DOMAIN_NESTED:
	case ARM_SMMU_DOMAIN_S2://tage 2模式配置ias,oas
		ias = smmu->ias;
		oas = smmu->oas;
		fmt = ARM_64_LPAE_S2;
		finalise_stage_fn = arm_smmu_domain_finalise_s2;
		break;
	default:
		return -EINVAL;
	}

	pgtbl_cfg = (struct io_pgtable_cfg) { //临时变量pgtbl_cfg用来初始化pgtable
		.pgsize_bitmap	= smmu->pgsize_bitmap,
		.ias		= ias,
		.oas		= oas,
		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
		.tlb		= &arm_smmu_flush_ops,
		.iommu_dev	= smmu->dev,
	};

	if (smmu_domain->non_strict)
		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;

	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
	if (!pgtbl_ops)
		return -ENOMEM;

	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
	domain->geometry.force_aperture = true;

	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
	if (ret < 0) {
		free_io_pgtable_ops(pgtbl_ops);
		return ret;
	}

	smmu_domain->pgtbl_ops = pgtbl_ops;
	return 0;
}

该函数主要配置了ARM_64_LPAE_S1或者ARM_64_LPAE_S2模式。初始化pgtable和pgtbl_ops。同时初始化arm_smmu_domain的arm_smmu_s1_cfg或者arm_smmu_s2_cfg结构体。主要通过调用了alloc_io_pgtable_ops和finalise_stage_fn函数实现:

static const struct io_pgtable_init_fns *
io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE
	[ARM_32_LPAE_S1] = &io_pgtable_arm_32_lpae_s1_init_fns,
	[ARM_32_LPAE_S2] = &io_pgtable_arm_32_lpae_s2_init_fns,
	[ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
	[ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
	[ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns,
#endif
#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
	[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
#endif
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
	[ARM_V8L_FAST] = &io_pgtable_av8l_fast_init_fns,
#endif
};

struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
	.alloc	= arm_64_lpae_alloc_pgtable_s1,
	.free	= arm_lpae_free_pgtable,
};

struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
	.alloc	= arm_64_lpae_alloc_pgtable_s2,
	.free	= arm_lpae_free_pgtable,
};

struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
					    struct io_pgtable_cfg *cfg,
					    void *cookie)
{
	struct io_pgtable *iop;
	const struct io_pgtable_init_fns *fns;

	if (fmt >= IO_PGTABLE_NUM_FMTS)
		return NULL;

	fns = io_pgtable_init_table[fmt];
	if (!fns)
		return NULL;

	iop = fns->alloc(cfg, cookie);
	if (!iop)
		return NULL;

	iop->fmt	= fmt;
	iop->cookie	= cookie;
	iop->cfg	= *cfg;

	return &iop->ops;
}
EXPORT_SYMBOL_GPL(alloc_io_pgtable_ops);

alloc_io_pgtable_ops函数根据传入的fmt(smmuv3中是ARM_64_LPAE_S1和ARM_64_LPAE_S2)。调用pgtable对应的alloc函数arm_64_lpae_alloc_pgtable_s1或者arm_64_lpae_alloc_pgtable_s2。

arm_64_lpae_alloc_pgtable_s1函数,主要调用arm_lpae_alloc_pgtable函数描述如下。初始化tcr和mair同时根据pgd_size申请pgd表。

arm_lpae_alloc_pgtable函数注册io_pgtable_ops如下,
以及根据io_pgtable_cfg参数确定pgd_size。

data->iop.ops = (struct io_pgtable_ops) {
		.map		= arm_lpae_map,
		.unmap		= arm_lpae_unmap,
		.iova_to_phys	= arm_lpae_iova_to_phys,
	};

arm_64_lpae_alloc_pgtable_s2函数与arm_64_lpae_alloc_pgtable_s1类似不在赘述。

通过finalise_stage_fn调用arm_smmu_domain_finalise_s1或者arm_smmu_domain_finalise_s2函数。
arm_smmu_domain_finalise_s1函数设置了arm_smmu_s1_cfg的asid,同时申请了Context Descriptor表,arm_smmu_s1_cfg的cdptr和cdptr_dma分别指向Context Descriptor表的VA和PA。将arm_64_lpae_alloc_pgtable_s1函数中获取的 ttbr(存放的PGD页表),tcr和mair,配置arm_smmu_domain中的arm_smmu_s1_cfg的ttbr,tcr, mair。其中cdptr的大小为64字节,可以容纳一个Context Descriptor表(64字节)。

arm_smmu_domain_finalise_s2函数设置了arm_smmu_s2_cfg的vmid,同时将arm_64_lpae_alloc_pgtable_s2函数中获取的vttbr(存放的PGD页表)和vtcr,配置arm_smmu_domain中的arm_smmu_s2_cfg的vttbr,vtcr。

arm_smmu_install_ste_for_dev实现如下

static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
{
	int i, j;
	struct arm_smmu_device *smmu = master->smmu;

	for (i = 0; i < master->num_sids; ++i) {
		u32 sid = master->sids[i];
		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);

		/* Bridged PCI devices may end up with duplicated IDs */
		for (j = 0; j < i; j++)
			if (master->sids[j] == sid)
				break;
		if (j < i)
			continue;

		arm_smmu_write_strtab_ent(master, sid, step);
	}
}
  • arm_smmu_get_step_for_sid函数使用SID做索引找到对应 Level 1 Stream Table Descriptor中的l2ptr指向的Stream Table Entry
  • arm_smmu_write_strtab_ent函数主要负责填充对应的Stream Table Entry。实现如下:
 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
				      __le64 *dst)
{
	/*
	 * This is hideously complicated, but we only really care about
	 * three cases at the moment:
	 *
	 * 1. Invalid (all zero) -> bypass/fault (init)
	 * 2. Bypass/fault -> translation/bypass (attach)
	 * 3. Translation/bypass -> bypass/fault (detach)
	 *
	 * Given that we can't update the STE atomically and the SMMU
	 * doesn't read the thing in a defined order, that leaves us
	 * with the following maintenance requirements:
	 *
	 * 1. Update Config, return (init time STEs aren't live)
	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
	 * 3. Update Config, sync
	 */
	u64 val = le64_to_cpu(dst[0]);
	bool ste_live = false;
	struct arm_smmu_device *smmu = NULL;
	struct arm_smmu_s1_cfg *s1_cfg = NULL;
	struct arm_smmu_s2_cfg *s2_cfg = NULL;
	struct arm_smmu_domain *smmu_domain = NULL;
	struct arm_smmu_cmdq_ent prefetch_cmd = {
		.opcode		= CMDQ_OP_PREFETCH_CFG,
		.prefetch	= {
			.sid	= sid,
		},
	};

	if (master) {
		smmu_domain = master->domain;
		smmu = master->smmu;
	}

	if (smmu_domain) {
		switch (smmu_domain->stage) {
		case ARM_SMMU_DOMAIN_S1:
			s1_cfg = &smmu_domain->s1_cfg;
			break;
		case ARM_SMMU_DOMAIN_S2:
		case ARM_SMMU_DOMAIN_NESTED:
			s2_cfg = &smmu_domain->s2_cfg;
			break;
		default:
			break;
		}
	}

	if (val & STRTAB_STE_0_V) {
		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
		case STRTAB_STE_0_CFG_BYPASS:
			break;
		case STRTAB_STE_0_CFG_S1_TRANS:
		case STRTAB_STE_0_CFG_S2_TRANS:
			ste_live = true;
			break;
		case STRTAB_STE_0_CFG_ABORT:
			BUG_ON(!disable_bypass);
			break;
		default:
			BUG(); /* STE corruption */
		}
	}

	/* Nuke the existing STE_0 value, as we're going to rewrite it */
	val = STRTAB_STE_0_V;

	/* Bypass/fault */
	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
		if (!smmu_domain && disable_bypass)
			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
		else
			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);

		dst[0] = cpu_to_le64(val);
		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
						STRTAB_STE_1_SHCFG_INCOMING));
		dst[2] = 0; /* Nuke the VMID */
		/*
		 * The SMMU can perform negative caching, so we must sync
		 * the STE regardless of whether the old value was live.
		 */
		if (smmu)
			arm_smmu_sync_ste_for_sid(smmu, sid);
		return;
	}

	if (s1_cfg) {//配置ARM_SMMU_DOMAIN_S1的Stream Table Entry
		BUG_ON(ste_live);
		dst[1] = cpu_to_le64(
			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));

		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);

		val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
	}

	if (s2_cfg) {//配置ARM_SMMU_DOMAIN_S2的Stream Table Entry
		BUG_ON(ste_live);
		dst[2] = cpu_to_le64(
			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
#ifdef __BIG_ENDIAN
			 STRTAB_STE_2_S2ENDI |
#endif
			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
			 STRTAB_STE_2_S2R);

		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);

		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
	}

	if (master->ats_enabled)
		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
						 STRTAB_STE_1_EATS_TRANS));

	arm_smmu_sync_ste_for_sid(smmu, sid);
	/* See comment in arm_smmu_write_ctx_desc() */
	WRITE_ONCE(dst[0], cpu_to_le64(val));
	arm_smmu_sync_ste_for_sid(smmu, sid);//发送CMD_CFGI_STE cmd

	/* It's likely that we'll want to use the new STE soon */
	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);//发送CMD_PREFETCH_CONFIG  cmd
}

根据arm_smmu_domain当前配置的ARM_SMMU_DOMAIN_S1还是ARM_SMMU_DOMAIN_S2。将对应的arm_smmu_s1_cfg或者arm_smmu_s2_cfg中的数据同步到对应的Stream Table Entry中。主要涉及到参考格式中的Stream Table Entry图表
对于arm_smmu_s1_cfg

  • S1CIR,S1COR配置为 Normal, Write-Back cacheable, Read-Allocate,S1CSH配置为Inner Shareable,STRW 配置为NS-EL1见下表:

  • 将cdptr_dma( Context Descriptor的物理地址)写到Stream Table Entry的S1ContextPtr中。配置Config标志位为 stage1 translate ,stage 2 bypass 。详情见下表。同时设置STE Valid。
    在这里插入图片描述对于arm_smmu_s2_cfg

  • vmid写到S2VMID,vttbr写到S2TTB ,vtcr写到S2TG,S2SH0,S2SH0,S2IR0,S2SL0,S2T0SZ中。同时打开S2ENDI,S2PTW,S2AA64,S2R。

  • 配置Config标志位为 stage1 bypass ,stage 2 translate。详情见下表。同时设置STE Valid。

至此STE表以及CD表的构建基本描述完成,下一步通过iommu_map填充MMU的页表。

iommu_map

iommu_map
–>arm_smmu_map
---->arm_lpae_map

先看iommu_map函数,参数分别是dev的iommu_domain,传入的iova地址,传入的需要map的PA地址 paddr, map的地址大小size,以及标记读写等的prot mask。

int iommu_map(struct iommu_domain *domain, unsigned long iova,
	      phys_addr_t paddr, size_t size, int prot)
{
	const struct iommu_ops *ops = domain->ops;
	unsigned long orig_iova = iova;
	unsigned int min_pagesz;
	size_t orig_size = size;
	phys_addr_t orig_paddr = paddr;
	int ret = 0;

	if (unlikely(ops->map == NULL ||
		     domain->pgsize_bitmap == 0UL))
		return -ENODEV;

	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
		return -EINVAL;

	/* find out the minimum page size supported */
	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);

	/*
	 * both the virtual address and the physical one, as well as
	 * the size of the mapping, must be aligned (at least) to the
	 * size of the smallest page supported by the hardware
	 */
	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
		       iova, &paddr, size, min_pagesz);
		return -EINVAL;
	}

	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);

	while (size) {
		size_t pgsize = iommu_pgsize(domain->pgsize_bitmap,
						iova | paddr, size);

		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
			 iova, &paddr, pgsize);

		ret = ops->map(domain, iova, paddr, pgsize, prot);
		if (ret)
			break;

		iova += pgsize;
		paddr += pgsize;
		size -= pgsize;
	}

	if (ops->iotlb_sync_map)
		ops->iotlb_sync_map(domain);

	/* unroll mapping in case something went wrong */
	if (ret)
		iommu_unmap(domain, orig_iova, orig_size - size);
	else
		trace_map(to_msm_iommu_domain(domain), orig_iova, orig_paddr,
			  orig_size, prot);

	return ret;
}
EXPORT_SYMBOL_GPL(iommu_map);
  • iommu_pgsize函数 根据iommu_domain的pgsize_bitmap(例如SZ_4K | SZ_2M | SZ_1G)以及需要map的size大小确定每次需要map的pgsize大小。
  • 之后调用arm_smmu_map函数每次map pgsize大小的地址。直到完成。

arm_smmu_map主要通过iommu_domain的pgtbl_ops回调arm_lpae_map(io-pgtable-arm.c)函数.

#define ARM_LPAE_MAX_LEVELS		4  //最多支持的4级页表
#define ARM_LPAE_START_LVL(d)		(ARM_LPAE_MAX_LEVELS - (d)->levels) //patable的开始的第一级页表
va_bits = cfg->ias - data->pg_shift;
data->levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
//确定支持的页表级别levels,由于va_bits和每一级页表的索引的比特位来确定。4K页表的bits_per_level是9.

static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
			phys_addr_t paddr, size_t size, int iommu_prot)
{
	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
	arm_lpae_iopte *ptep = data->pgd;//ptep取值为pgd
	int ret, lvl = ARM_LPAE_START_LVL(data);//patable的开始的第一级页表
	arm_lpae_iopte prot;

	/* If no access, then nothing to do */
	if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
		return 0;

	if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
		    paddr >= (1ULL << data->iop.cfg.oas)))
		return -ERANGE;

	prot = arm_lpae_prot_to_pte(data, iommu_prot);
	ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, NULL,
				NULL);
	/*
	 * Synchronise all PTE updates for the new mapping before there's
	 * a chance for anything to kick off a table walk for the new iova.
	 */
	wmb();

	return ret;
}
  • arm_lpae_prot_to_pte函数主要根据io_pgtable_fmt(ARM_32_LPAE_S1/ARM_32_LPAE_S2/ARM_64_LPAE_S1/ARM_64_LPAE_S2)和iommu_prot(IOMMU_READ/IOMMU_WRITE/IOMMU_CACHE/IOMMU_NOEXEC)创建pte的属性mask。
  • __arm_lpae_map函数逐层创建页表索引。

__arm_lpae_map函数的实现跟linux中创建页表的方式类似,只不过的索引VA变为IOVA。

__arm_lpae_map是递归函数,主要调用以下函数:
arm_lpae_init_pte:申请地址的size如果匹配各个页表level的block_size(SZ_4K | SZ_2M | SZ_1G),将AP和页表属性写到pte页表索引中。
arm_lpae_install_table:否则通过__arm_lpae_alloc_pages申请页表索引,将索引的PA和索引的table属性填充到上一级页表。

  • 24
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值