ARM64 pcie msi-x中断申请注册过程

bai-xin

已于 2024-10-17 18:04:45 修改

阅读量638

点赞数 6

文章标签：中断 pcie msi-x arm64

于 2024-10-15 16:49:12 首次发布

本文链接：https://blog.csdn.net/qq_16054639/article/details/142956664

版权

承接上文：arm64 中断处理流程
我们来看一下pcie msi-x中断是怎样注册的，他的中断是写在设备树中的，还是动态分配的。先说一下结论，pcie msi-x的中断是内核动态分配的。

驱动调用过程

我们先来一段示例代码，展示驱动如何申请和使用pcie msi-x中断：

#include <linux/pci.h>
#include <linux/interrupt.h>

struct pci_dev *pdev;
int irq_vectors;
int irq;

// 假设 pdev 已经指向一个有效的 PCI 设备

// 尝试分配 1 到 4 个中断向量，允许使用 MSI-X
irq_vectors = pci_alloc_irq_vectors(pdev, 1, 4, PCI_IRQ_MSIX);

if (irq_vectors < 0) {
    // 处理错误
    printk(KERN_ERR "Failed to allocate MSI-X vectors: %d\n", irq_vectors);
} else {
    printk(KERN_INFO "Allocated %d MSI-X vectors\n", irq_vectors);

    // 获取主中断号（即第一个中断向量的中断号）
    irq = pci_irq_vector(pdev, 0);
    if (irq < 0) {
        printk(KERN_ERR "Failed to get IRQ number for vector 0: %d\n", irq);
    } else {
        printk(KERN_INFO "IRQ number for vector 0: %d\n", irq);

        // 注册中断处理函数
        if (request_irq(irq, my_interrupt_handler, 0, "my_device", pdev)) {
            printk(KERN_ERR "Failed to request IRQ %d\n", irq);
        } else {
            printk(KERN_INFO "IRQ %d requested successfully\n", irq);
        }
    }
}

最重要的有一下三个步骤：

pci_alloc_irq_vectors
pci_irq_vector
request_irq

request_irq我们就忽略了，不清楚的可以看之前的文章

主要模块梳理

pcie msi-x涉及到的模块比较多，主要有三个部分

pcie框架部分
irq框架部分
gic v3 its部分

我们重点来看一下pci_alloc_irq_vectors，这个函数进来过后直接在头文件转到了pci_alloc_irq_vectors_affinity

drivers/pci/msi.c

int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
				   unsigned int max_vecs, unsigned int flags,
				   struct irq_affinity *affd)
{
	struct irq_affinity msi_default_affd = {0};
	int nvecs = -ENOSPC;
	
	if (flags & PCI_IRQ_MSIX) {
		nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
						affd, flags);
		if (nvecs > 0)
			return nvecs;
	}

	if (flags & PCI_IRQ_MSI) {
		nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd);
		if (nvecs > 0)
			return nvecs;
	}
	return nvecs;
}

__pci_enable_msix_range

static int __pci_enable_msix_range(struct pci_dev *dev,
				   struct msix_entry *entries, int minvec,
				   int maxvec, struct irq_affinity *affd,
				   int flags)
{
	for (;;) {
		if (affd) {
			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
			if (nvec < minvec)
				return -ENOSPC;
		}

		rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
	}
}

__pci_enable_msix

static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
			     int nvec, struct irq_affinity *affd, int flags)
{
	nr_entries = pci_msix_vec_count(dev);
	if (nr_entries < 0)
		return nr_entries;
	if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
		return nr_entries;

	if (entries) {
		/* Check for any invalid entries */
		for (i = 0; i < nvec; i++) {
			if (entries[i].entry >= nr_entries)
				return -EINVAL;		/* invalid entry */
			for (j = i + 1; j < nvec; j++) {
				if (entries[i].entry == entries[j].entry)
					return -EINVAL;	/* duplicate entry */
			}
		}
	return msix_capability_init(dev, entries, nvec, affd);
}

msix_capability_init

static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
				int nvec, struct irq_affinity *affd)
{
	ret = msix_setup_entries(dev, base, entries, nvec, affd);
	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
	msix_update_entries(dev, entries);
	return 0;
}

pci_msi_setup_msi_irqs

static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
	struct irq_domain *domain;

	domain = dev_get_msi_domain(&dev->dev);
	if (domain && irq_domain_is_hierarchy(domain))
		return msi_domain_alloc_irqs(domain, &dev->dev, nvec);

	return arch_setup_msi_irqs(dev, nvec, type);
}

这里才到重点，msi_domain_alloc_irqs开始真正干事情
kernel/irq/msi.c

int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
			  int nvec)
{
	struct msi_domain_info *info = domain->host_data;
	struct msi_domain_ops *ops = info->ops;

	return ops->domain_alloc_irqs(domain, dev, nvec);
}

int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
			    int nvec)
{
	ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
	if (ret)
		return ret;

	for_each_msi_entry(desc, dev) {
		ops->set_desc(&arg, desc);

		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
					       dev_to_node(dev), &arg, false,
					       desc->affinity);
		if (virq < 0) {
			ret = -ENOSPC;
			if (ops->handle_error)
				ret = ops->handle_error(domain, desc, ret);
			if (ops->msi_finish)
				ops->msi_finish(&arg, ret);
			return ret;
		}

		for (i = 0; i < desc->nvec_used; i++) {
			irq_set_msi_desc_off(virq, i, desc);
			irq_debugfs_copy_devname(virq + i, dev);
		}
	}

	if (ops->msi_finish)
		ops->msi_finish(&arg, 0);

	for_each_msi_vector(desc, i, dev) {
		if (desc->irq == i) {
			virq = desc->irq;
			dev_dbg(dev, "irq [%d-%d] for MSI\n",
				virq, virq + desc->nvec_used - 1);
		}

		irq_data = irq_domain_get_irq_data(domain, i);
		if (!can_reserve) {
			irqd_clr_can_reserve(irq_data);
			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
				irqd_set_msi_nomask_quirk(irq_data);
			if ((info->flags & MSI_FLAG_ACTIVATE_EARLY) &&
				irqd_affinity_is_managed(irq_data) &&
				!cpumask_intersects(irq_data_get_affinity_mask(irq_data),
						    cpu_online_mask)) {
				irqd_set_managed_shutdown(irq_data);
				continue;
			}
		}
		ret = irq_domain_activate_irq(irq_data, can_reserve);
	}
}

这里的主要过程有：

msi_domain_prepare_irqs会进入ITS，进行PCIE设备相关映射操作，这里会向ITS添加一个pcie设备到设备查找表
__irq_domain_alloc_irqs主要就是分配硬件irq编号，硬件irq编号就是从这里来的，通过irq_domain_alloc_irqs_hierarchy进入ITS分配硬件中断
irq_domain_activate_irq会进入ITS，进行irq配置操作，将上面pcie设备的irq配置到ITT表中

下面一个一个看

int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
			    int nvec, msi_alloc_info_t *arg)
{
	ret = ops->msi_check(domain, info, dev);
	if (ret == 0)
		ret = ops->msi_prepare(domain, dev, nvec, arg);

	return ret;
}

msi_prepare由irq-giv-v3-pci-msi.c注册
/drivers/irqchip/irq-giv-v3-pci-msi.c

static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
			       int nvec, msi_alloc_info_t *info)
{
	msi_info = msi_get_domain_info(domain->parent);

	pdev = to_pci_dev(dev);

	pci_for_each_dma_alias(pdev, its_get_pci_alias, &alias_dev);
	if (alias_dev != pdev && alias_dev->subordinate)
		pci_walk_bus(alias_dev->subordinate, its_pci_msi_vec_count,
			     &alias_count);

	/* ITS specific DeviceID, as the core ITS ignores dev. */
	info->scratchpad[0].ul = pci_msi_domain_get_msi_rid(domain, pdev);
	return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info);
}

static struct msi_domain_ops its_pci_msi_ops = {
	.msi_prepare	= its_pci_msi_prepare,
};

上面主要是生成device_id，放入info->scratchpad[0].ul中，msi_prepare调用到irq-giv-v3-msi.c中
/drivers/irqchip/irq-giv-v3-its.c

static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
			   int nvec, msi_alloc_info_t *info)
{
	its_dev = its_create_device(its, dev_id, nvec, true);
}

static struct msi_domain_ops its_msi_domain_ops = {
	.msi_prepare	= its_msi_prepare,
};

static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
					    int nvecs, bool alloc_lpis)
{
	struct its_device *dev;
	unsigned long *lpi_map = NULL;
	unsigned long flags;
	u16 *col_map = NULL;
	void *itt;
	int lpi_base;
	int nr_lpis;
	int nr_ites;
	int sz;
	gfp_t gfp_flags;

	if (!its_alloc_device_table(its, dev_id))
		return NULL;

	if (WARN_ON(!is_power_of_2(nvecs)))
		nvecs = roundup_pow_of_two(nvecs);

	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
	/*
	 * Even if the device wants a single LPI, the ITT must be
	 * sized as a power of two (and you need at least one bit...).
	 */
	nr_ites = max(2, nvecs);
	sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1);
	sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
	gfp_flags = GFP_KERNEL;
	if (of_machine_is_compatible("rockchip,rk3568") || of_machine_is_compatible("rockchip,rk3566")) {
		gfp_flags |= GFP_DMA32;
		itt = (void *)__get_free_pages(gfp_flags, get_order(sz));
	} else {
		itt = kzalloc_node(sz, gfp_flags, its->numa_node);
	}

	if (alloc_lpis) {
		lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis);
		if (lpi_map)
			col_map = kcalloc(nr_lpis, sizeof(*col_map),
					  GFP_KERNEL);
	} else {
		col_map = kcalloc(nr_ites, sizeof(*col_map), GFP_KERNEL);
		nr_lpis = 0;
		lpi_base = 0;
	}

	if (!dev || !itt ||  !col_map || (!lpi_map && alloc_lpis)) {
		kfree(dev);

		if (of_machine_is_compatible("rockchip,rk3568") ||
		    of_machine_is_compatible("rockchip,rk3566"))
			free_pages((unsigned long)itt, get_order(sz));
		else
			kfree(itt);

		kfree(lpi_map);
		kfree(col_map);
		return NULL;
	}

	gic_flush_dcache_to_poc(itt, sz);

	dev->its = its;
	dev->itt = itt;
	dev->itt_sz = sz;
	dev->nr_ites = nr_ites;
	dev->event_map.lpi_map = lpi_map;
	dev->event_map.col_map = col_map;
	dev->event_map.lpi_base = lpi_base;
	dev->event_map.nr_lpis = nr_lpis;
	raw_spin_lock_init(&dev->event_map.vlpi_lock);
	dev->device_id = dev_id;
	INIT_LIST_HEAD(&dev->entry);

	raw_spin_lock_irqsave(&its->lock, flags);
	list_add(&dev->entry, &its->its_device_list);
	raw_spin_unlock_irqrestore(&its->lock, flags);

	/* Map device to its ITT */
	its_send_mapd(dev, 1);

	return dev;
}

这里主要是构造ITS命令，最后通过its_send_mapd发送到硬件执行，具体过程参见GIC v3 详解
这里面有一个关键的地方需要注意its_lpi_alloc(nvecs, &lpi_base, &nr_lpis);lpi_base在这里分配，他决定了这个设备的event_id和后面hwirq的生成编号。我们来看一下生成算法：

struct lpi_range {
	struct list_head	entry;
	u32			base_id;
	u32			span;
};

static int alloc_lpi_range(u32 nr_lpis, u32 *base)
{
	struct lpi_range *range, *tmp;
	int err = -ENOSPC;

	mutex_lock(&lpi_range_lock);

	list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
		if (range->span >= nr_lpis) {
			*base = range->base_id;
			range->base_id += nr_lpis;
			range->span -= nr_lpis;

			if (range->span == 0) {
				list_del(&range->entry);
				kfree(range);
			}

			err = 0;
			break;
		}
	}

	mutex_unlock(&lpi_range_lock);

	pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis);
	return err;
}

static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids)
{
	unsigned long *bitmap = NULL;
	int err = 0;

	do {
		err = alloc_lpi_range(nr_irqs, base);
		if (!err)
			break;

		nr_irqs /= 2;
	} while (nr_irqs > 0);

	if (!nr_irqs)
		err = -ENOSPC;

	if (err)
		goto out;

	bitmap = kcalloc(BITS_TO_LONGS(nr_irqs), sizeof (long), GFP_ATOMIC);
	if (!bitmap)
		goto out;

	*nr_ids = nr_irqs;

out:
	if (!bitmap)
		*base = *nr_ids = 0;

	return bitmap;
}

这里是通过alloc_lpi_range来分配lpi_base的，从lpi_range链表中找到第一个合适的值，这里lpi_range主要用于管理lpi_base的值分配，回收，合并。这里乍一看lpi_range没有地方初始化，其实是通过free操作向lpi_range链表中插入一个range来初始化的

static int free_lpi_range(u32 base, u32 nr_lpis)
{
	struct lpi_range *new, *old;

	new = mk_lpi_range(base, nr_lpis);
	if (!new)
		return -ENOMEM;

	mutex_lock(&lpi_range_lock);

	list_for_each_entry_reverse(old, &lpi_range_list, entry) {
		if (old->base_id < base)
			break;
	}
	/*
	 * old is the last element with ->base_id smaller than base,
	 * so new goes right after it. If there are no elements with
	 * ->base_id smaller than base, &old->entry ends up pointing
	 * at the head of the list, and inserting new it the start of
	 * the list is the right thing to do in that case as well.
	 */
	list_add(&new->entry, &old->entry);
	/*
	 * Now check if we can merge with the preceding and/or
	 * following ranges.
	 */
	merge_lpi_ranges(old, new);
	merge_lpi_ranges(new, list_next_entry(new, entry));

	mutex_unlock(&lpi_range_lock);
	return 0;
}
static int __init its_lpi_init(u32 id_bits)
{
	u32 lpis = (1UL << id_bits) - 8192;
	u32 numlpis;
	int err;

	numlpis = 1UL << GICD_TYPER_NUM_LPIS(gic_rdists->gicd_typer);

	if (numlpis > 2 && !WARN_ON(numlpis > lpis)) {
		lpis = numlpis;
		pr_info("ITS: Using hypervisor restricted LPI range [%u]\n",
			lpis);
	}

	/*
	 * Initializing the allocator is just the same as freeing the
	 * full range of LPIs.
	 */
	err = free_lpi_range(8192, lpis);
	pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis);
	return err;
}

可以看到在lpi初始化的时候就调用free_lpi_range向lpi_range链表中插入了第一个range，起始值是8192，数量根据gic能力计算，之后都是从这里面分配。而lpi_base就是当前设备hwirq的起始值，之后的值都是往后递增。

下面看一下__irq_domain_alloc_irqs

int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
			    unsigned int nr_irqs, int node, void *arg,
			    bool realloc, const struct irq_affinity_desc *affinity)
{
	int i, ret, virq;

	if (domain == NULL) {
		domain = irq_default_domain;
		if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
			return -EINVAL;
	}

	if (realloc && irq_base >= 0) {
		virq = irq_base;
	} else {
		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
					      affinity);
		if (virq < 0) {
			pr_debug("cannot allocate IRQ(base %d, count %d)\n",
				 irq_base, nr_irqs);
			return virq;
		}
	}

	if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {
		pr_debug("cannot allocate memory for IRQ%d\n", virq);
		ret = -ENOMEM;
		goto out_free_desc;
	}

	mutex_lock(&irq_domain_mutex);
	ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
	if (ret < 0) {
		mutex_unlock(&irq_domain_mutex);
		goto out_free_irq_data;
	}

	for (i = 0; i < nr_irqs; i++) {
		ret = irq_domain_trim_hierarchy(virq + i);
		if (ret) {
			mutex_unlock(&irq_domain_mutex);
			goto out_free_irq_data;
		}
	}
	
	for (i = 0; i < nr_irqs; i++)
		irq_domain_insert_irq(virq + i);
	mutex_unlock(&irq_domain_mutex);

	return virq;

out_free_irq_data:
	irq_domain_free_irq_data(virq, nr_irqs);
out_free_desc:
	irq_free_descs(virq, nr_irqs);
	return ret;
}

我们主要跟踪irq_domain_alloc_irqs_hierarchy

int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
				    unsigned int irq_base,
				    unsigned int nr_irqs, void *arg)
{
	if (!domain->ops->alloc) {
		pr_debug("domain->ops->alloc() is NULL\n");
		return -ENOSYS;
	}

	return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}

下面进入ITS
drivers/irqchip/irq-gic-v3-its.c

static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
				unsigned int nr_irqs, void *args)
{
	msi_alloc_info_t *info = args;
	struct its_device *its_dev = info->scratchpad[0].ptr;
	struct its_node *its = its_dev->its;
	struct irq_data *irqd;
	irq_hw_number_t hwirq;
	int err;
	int i;

	err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq);
	if (err)
		return err;

	err = iommu_dma_prepare_msi(info->desc, its->get_msi_base(its_dev));
	if (err)
		return err;

	for (i = 0; i < nr_irqs; i++) {
		err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
		if (err)
			return err;

		irq_domain_set_hwirq_and_chip(domain, virq + i,
					      hwirq + i, &its_irq_chip, its_dev);
		irqd = irq_get_irq_data(virq + i);
		irqd_set_single_target(irqd);
		irqd_set_affinity_on_activate(irqd);
		pr_debug("ID:%d pID:%d vID:%d\n",
			 (int)(hwirq + i - its_dev->event_map.lpi_base),
			 (int)(hwirq + i), virq + i);
	}

	return 0;
}
static const struct irq_domain_ops its_domain_ops = {
	.alloc			= its_irq_domain_alloc,
	.free			= its_irq_domain_free,
	.activate		= its_irq_domain_activate,
	.deactivate		= its_irq_domain_deactivate,
};

上面代码中第一个函数its_alloc_device_irq就是真正分配hwirq的地方

static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq)
{
	int idx;

	/* Find a free LPI region in lpi_map and allocate them. */
	idx = bitmap_find_free_region(dev->event_map.lpi_map,
				      dev->event_map.nr_lpis,
				      get_count_order(nvecs));
	if (idx < 0)
		return -ENOSPC;

	*hwirq = dev->event_map.lpi_base + idx;

	return 0;
}

可以看到hwirq实际上等于lpi_base + 序号，也印证了之前说的lpi_base就是当前设备的起始hwirq

好，我们回过头来看一下irq_domain_activate_irq

static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve)
{
	int ret = 0;

	if (irqd && irqd->domain) {
		struct irq_domain *domain = irqd->domain;

		if (irqd->parent_data)
			ret = __irq_domain_activate_irq(irqd->parent_data,
							reserve);
		if (!ret && domain->ops->activate) {
			ret = domain->ops->activate(domain, irqd, reserve);
			/* Rollback in case of error */
			if (ret && irqd->parent_data)
				__irq_domain_deactivate_irq(irqd->parent_data);
		}
	}
	return ret;
}

int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve)
{
	int ret = 0;

	if (!irqd_is_activated(irq_data))
		ret = __irq_domain_activate_irq(irq_data, reserve);
	if (!ret)
		irqd_set_activated(irq_data);
	return ret;
}

上面的关键点在domain->ops->activate这一句，这里进入到ITS进行中断配置。要注意__irq_domain_activate_irq这个函数，他会级联调用父节点的active，从设备树角度来看pci irq domain挂在了its irq domain上，所以这里先active了its domain，然后active pci domain。按顺序先看its irq domain：

static inline u32 its_get_event_id(struct irq_data *d)
{
	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
	return d->hwirq - its_dev->event_map.lpi_base;
}

static int its_irq_domain_activate(struct irq_domain *domain,
				   struct irq_data *d, bool reserve)
{
	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
	u32 event = its_get_event_id(d);
	int cpu;

	cpu = its_select_cpu(d, cpu_online_mask);
	if (cpu < 0 || cpu >= nr_cpu_ids)
		return -EINVAL;

	its_inc_lpi_count(d, cpu);
	its_dev->event_map.col_map[event] = cpu;
	irq_data_update_effective_affinity(d, cpumask_of(cpu));

	/* Map the GIC IRQ and event to the device */
	its_send_mapti(its_dev, d->hwirq, event);
	return 0;
}
static const struct irq_domain_ops its_domain_ops = {
	.alloc			= its_irq_domain_alloc,
	.free			= its_irq_domain_free,
	.activate		= its_irq_domain_activate,
	.deactivate		= its_irq_domain_deactivate,
};

也是构造命令，最后通过its_send_mapti发送给硬件执行。可以看到event id实际上就是当前设备的第几个hwirq，是当前设备irq的编号

下面看一下pci domain的逻辑，上面its domain将中断信息配置进了its，下面就会将同样的中断信息写入pci bar：
kernel/irq/msi.c

static inline void irq_chip_write_msi_msg(struct irq_data *data,
					  struct msi_msg *msg)
{
	data->chip->irq_write_msi_msg(data, msg);
}

static int msi_domain_activate(struct irq_domain *domain,
			       struct irq_data *irq_data, bool early)
{
	struct msi_msg msg[2] = { [1] = { }, };

	BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
	msi_check_level(irq_data->domain, msg);
	irq_chip_write_msi_msg(irq_data, msg);
	return 0;
}

static const struct irq_domain_ops msi_domain_ops = {
	.alloc		= msi_domain_alloc,
	.free		= msi_domain_free,
	.activate	= msi_domain_activate,
	.deactivate	= msi_domain_deactivate,
};

drivers/pci/msi.c

static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info)
{
	struct irq_chip *chip = info->chip;

	BUG_ON(!chip);
	if (!chip->irq_write_msi_msg)
		chip->irq_write_msi_msg = pci_msi_domain_write_msg;
	if (!chip->irq_mask)
		chip->irq_mask = pci_msi_mask_irq;
	if (!chip->irq_unmask)
		chip->irq_unmask = pci_msi_unmask_irq;
}

void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg)
{
	struct msi_desc *desc = irq_data_get_msi_desc(irq_data);

	/*
	 * For MSI-X desc->irq is always equal to irq_data->irq. For
	 * MSI only the first interrupt of MULTI MSI passes the test.
	 */
	if (desc->irq == irq_data->irq)
		__pci_write_msi_msg(desc, msg);
}

void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{
	struct pci_dev *dev = msi_desc_to_pci_dev(entry);

	if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) {
		/* Don't touch the hardware now */
	} else if (entry->msi_attrib.is_msix) {
		void __iomem *base = pci_msix_desc_addr(entry);
		bool unmasked = !(entry->masked & PCI_MSIX_ENTRY_CTRL_MASKBIT);

		if (!base)
			goto skip;

		/*
		 * The specification mandates that the entry is masked
		 * when the message is modified:
		 *
		 * "If software changes the Address or Data value of an
		 * entry while the entry is unmasked, the result is
		 * undefined."
		 */
		if (unmasked)
			__pci_msix_desc_mask_irq(entry, PCI_MSIX_ENTRY_CTRL_MASKBIT);

		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);

		if (unmasked)
			__pci_msix_desc_mask_irq(entry, 0);

		/* Ensure that the writes are visible in the device */
		readl(base + PCI_MSIX_ENTRY_DATA);
	} else {
		int pos = dev->msi_cap;
		u16 msgctl;

		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
		msgctl |= entry->msi_attrib.multiple << 4;
		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);

		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				       msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					       msg->address_hi);
			pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
					      msg->data);
		} else {
			pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
					      msg->data);
		}
		/* Ensure that the writes are visible in the device */
		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
	}

skip:
	entry->msg = *msg;

	if (entry->write_msi_msg)
		entry->write_msi_msg(entry, entry->write_msi_msg_data);

}