内核虚拟化:虚拟中断注入

KVM_CREATE_IRQCHIP(中断控制器初始化)

KVM_CREATE_IRQCHIP用于在虚拟机初始化阶段创建中断请求芯片,当KVM接收到虚拟机相关联的ioctl系统调用时,函数kvm_vm_ioctl()进行处理调用上面的kvm_arch_vm_ioctl()函数,该函数中完成了初始化pic和ioapic控制器模块,配置中断请求默认路由等任务:

long kvm_arch_vm_ioctl(struct file *filp,
		       unsigned int ioctl, unsigned long arg)
{
	struct kvm *kvm = filp->private_data;
	void __user *argp = (void __user *)arg;
	int r = -ENOTTY;
	/*
	 * This union makes it completely explicit to gcc-3.x
	 * that these two variables' stack usage should be
	 * combined, not added together.
	 */
	union {
		struct kvm_pit_state ps;
		struct kvm_pit_state2 ps2;
		struct kvm_pit_config pit_config;
	} u;

	switch (ioctl) {
		...
		case KVM_CREATE_IRQCHIP: {
			mutex_lock(&kvm->lock);
			...
			//初始化虚拟pic
			r = kvm_pic_init(kvm);
			...
			//初始化虚拟ioapic
			r = kvm_ioapic_init(kvm);
			... 
			// 设置默认的中断路由
			r = kvm_setup_default_irq_routing(kvm);
			...
            /* Write kvm->irq_routing before enabling irqchip_in_kernel. */
            smp_wmb();
            kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
            ...
            mutex_unlock(&kvm->lock);
            break;
	}
	...

其中的kvm_setup_default_irq_routing会依次调用kvm_set_irq_routing和setup_routing_entry,最终调用kvm_set_routing_entry函数完成路由配置,核心操作是将各个类型中断控制器的中断置为函数与该类型的控制器路由入口进行绑定,以备后续发生中断请求时调用。

整体函数调用链如图:
在这里插入图片描述

int kvm_set_routing_entry(struct kvm *kvm,
			  struct kvm_kernel_irq_routing_entry *e,
			  const struct kvm_irq_routing_entry *ue)
{
	/* We can't check irqchip_in_kernel() here as some callers are
	 * currently initializing the irqchip. Other callers should therefore
	 * check kvm_arch_can_set_irq_routing() before calling this function.
	 */
	switch (ue->type) {
	case KVM_IRQ_ROUTING_IRQCHIP: //中断路由芯片
		if (irqchip_split(kvm))
			return -EINVAL;
		e->irqchip.pin = ue->u.irqchip.pin;//设置中断芯片引脚
		switch (ue->u.irqchip.irqchip) {
		case KVM_IRQCHIP_PIC_SLAVE:
			e->irqchip.pin += PIC_NUM_PINS / 2;
			fallthrough;
		case KVM_IRQCHIP_PIC_MASTER:
			if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
				return -EINVAL;
			 设置处理 PIC 中断的回调函数
			e->set = kvm_set_pic_irq; 
			break;
		case KVM_IRQCHIP_IOAPIC:
			if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
				return -EINVAL;
			// 设置处理 IOPIC 中断的回调函数
			e->set = kvm_set_ioapic_irq;
			break;
		default:
			return -EINVAL;
		}
		e->irqchip.irqchip = ue->u.irqchip.irqchip;
		break;
	case KVM_IRQ_ROUTING_MSI:
		// 设置处理 MSI 中断的回调函数
		e->set = kvm_set_msi;
		e->msi.address_lo = ue->u.msi.address_lo;
		e->msi.address_hi = ue->u.msi.address_hi;
		e->msi.data = ue->u.msi.data;

		if (kvm_msi_route_invalid(kvm, e))
			return -EINVAL;
		break;
	case KVM_IRQ_ROUTING_HV_SINT:
		e->set = kvm_hv_set_sint;
		e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
		e->hv_sint.sint = ue->u.hv_sint.sint;
		break;
#ifdef CONFIG_KVM_XEN
	case KVM_IRQ_ROUTING_XEN_EVTCHN:
		return kvm_xen_setup_evtchn(kvm, e, ue);
#endif
	default:
		return -EINVAL;
	}

	return 0;
}

KVM_IRQ_LINE(中断注入)

KVM_CREATE_IRQCHIP用于虚拟机向VMM的虚拟apic发送中断请求,再有VMM将中断交付虚拟cpu处理,当kvm_vm_ioctl函数被调用并处理KVM_IRQ_LINE请求时会调用kvm_vm_ioctl_irq_line,该函数调用kvm_set_irq完成中断注入,其核心任务就是根据中断控制器类型调用之前所绑定的中断回调函数,这些中断回调函数会将中断请求写入虚拟cpu的vmcs中。

/* kvm_set_irq - 设置或清除 KVM 虚拟机中的一个中断
 * @kvm: 指向当前 KVM 实例的指针
 * @irq_source_id: 中断源的标识符,用于区分不同的中断源
 * @irq: 要操作的中断号
 * @level: 指定中断的电平,通常 1 表示触发中断,0 表示清除中断
 * @line_status: 指定中断线路的当前状态
 *
 * Return value:
 *  < 0   中断被忽略(被屏蔽或其他原因未被送达)
 *  = 0   中断被合并(之前的中断仍在等待处理)
 *  > 0   中断成功送达的 CPU 数量
 */
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
		bool line_status)
{
	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
	int ret = -1, i, idx;

	trace_kvm_set_irq(irq, level, irq_source_id);

	idx = srcu_read_lock(&kvm->irq_srcu);
	i = kvm_irq_map_gsi(kvm, irq_set, irq);
	srcu_read_unlock(&kvm->irq_srcu, idx);
	
	/* 之所以进行循环遍历设置而不直接调用相应的处理函数,是因为
     * 无法检测客户机是使用 PIC 还是 IOAPIC。
     * 因此,在两者中都设置位。客户机将忽略无效的中断注入。
     */
     
	while (i--) {
		int r;
		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
				   line_status);
		if (r < 0)
			continue;

		ret = r + ((ret < 0) ? 0 : ret);
	}

	return ret;
}

当需要注入的中断为msi类型时,kvm_vm_ioctl会处理为KVM_SIGNAL_MSI类型的的请求,依次调用kvm_send_userspace_msi和kvm_set_msi

int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
{
	struct kvm_kernel_irq_routing_entry route;

	if (!kvm_arch_irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
		return -EINVAL;

	route.msi.address_lo = msi->address_lo;
	route.msi.address_hi = msi->address_hi;
	route.msi.data = msi->data;
	route.msi.flags = msi->flags;
	route.msi.devid = msi->devid;

	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
}
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
		struct kvm *kvm, int irq_source_id, int level, bool line_status)
{
	struct kvm_lapic_irq irq;

	if (!level)
		return -1;

	kvm_set_msi_irq(e, &irq); //填充kvm_lapic_irq结构体

	return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); //分发中断
}
void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
		     struct kvm_lapic_irq *irq)
{
	//tracepoint kvm_msi_set_irq
	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);

	// 解析 MSI 中断信息并填充到 kvm_lapic_irq 结构体中
	irq->dest_id = (e->msi.address_lo &
			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
	irq->vector = (e->msi.data &
			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
	irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
	irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
	irq->delivery_mode = e->msi.data & 0x700;
	irq->msi_redir_hint = ((e->msi.address_lo
		& MSI_ADDR_REDIRECTION_LOWPRI) > 0);
	irq->level = 1;
	irq->shorthand = 0;
}

其中ioapic的回调函数kvm_set_ioapic_irq依次调用kvm_ioapic_set_irq、ioapic_set_irq最后调用ioapic_service函数,ioapic_service主要是找到中断的重映射表,然后查找中断的目的地信息并转发到对应vcpu的lapic去处理。然后会调用kvm_irq_delivery_to_apic负责将中断分发给lapic

int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
		struct kvm_lapic_irq *irq, unsigned long *dest_map)
{
	int i, r = -1;
	struct kvm_vcpu *vcpu, *lowest = NULL;

	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
			kvm_lowest_prio_delivery(irq)) {
		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
		irq->delivery_mode = APIC_DM_FIXED;
	}

	//使用预先存在kvm中的kvm_apic_map来查找lapic
	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
		return r;
	
	/*目的是在虚拟机中选择一个或多个目标 vcpu,
	 将中断传递给它们。如果是选择最低优先级的 vcpu,则在循环结束后,
	 lowest 变量将指向具有最低优先级的 vcpu。
	*/
	kvm_for_each_vcpu(i, vcpu, kvm) {
		if (!kvm_apic_present(vcpu))  //当前vcpu没有启用lapic
			continue;

		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
					irq->dest_id, irq->dest_mode)) //如果vcpu的lapic匹配了指定的目标
			continue;

		if (!kvm_lowest_prio_delivery(irq)) { //如果不是要求将中断传递给具有最低优先级的处理器
			if (r < 0)
				r = 0;
			r += kvm_apic_set_irq(vcpu, irq, dest_map);
		} else if (kvm_lapic_enabled(vcpu)) { //如果是要求将中断传递给具有最低优先级的处理器,
		//并且当前 vcpu 的 LAPIC 已启用
			if (!lowest)
				lowest = vcpu;
			else if (kvm_apic_compare_prio(vcpu, lowest) < 0) //如果当前 vcpu 的 LAPIC 优先级比
			// lowest 的 LAPIC 优先级更低
				lowest = vcpu;
		}
	}

	if (lowest)
		r = kvm_apic_set_irq(lowest, irq, dest_map);

	return r;
}

选取不同类型的的中断控制器的回调函数kvm_set_ioapic_irq–>kvm_ioapic_set_irq–>ioapic_set_irq、kvm_set_pic_irq–>kvm_pic_set_irq、kvm_set_msi作为ebpf程序的挂载点,可以监控中断注入的详细信息,如下:

TIME(ms)           COMM            PID        DELAY      TYPE/PIN       DST/VEC    OTHERS  
962804587.768667   CPU 0/KVM       269394     773        MSI       /-   0x2/40     Fixed  |physical|edge |-     |-
962805792.231419   vhost-529746    529767     3008       MSI       /-   0x1/40     Fixed  |physical|edge |-     |-
962805792.234556   vhost-269394    269403     1442       MSI       /-   0x3/40     Fixed  |physical|edge |-     |-
962805792.243754   vhost-426070    426078     1323       MSI       /-   0x5/35     Fixed  |physical|edge |-     |-
962806603.650275   CPU 0/KVM       269394     3738       MSI       /-   0x2/40     Fixed  |physical|edge |-     |-
962806603.713743   CPU 0/KVM       269394     1414       MSI       /-   0x2/40     Fixed  |physical|edge |-     |-
962806816.308239   qemu-system-x86 269394     29495      IOAPIC    /21  0x4/39     Fixed  |physical|level|-     |-
962806816.359852   qemu-system-x86 269394     38615      PIC slave /2   -  /-      -      |-       |level|masked|-
962806816.400501   qemu-system-x86 269394     1259       IOAPIC    /10  0  /0      Fixed  |physical|edge |masked|-
962806816.408792   qemu-system-x86 269394     1270       PIC slave /2   -  /-      -      |-       |level|masked|-
962806816.410425   qemu-system-x86 269394     226        IOAPIC    /10  0  /0      Fixed  |physical|edge |masked|-
962809792.316035   vhost-426070    426078     1747       MSI       /-   0x5/35     Fixed  |physical|edge |-     |-
962810635.636493   CPU 0/KVM       269394     3034       MSI       /-   0x2/40     Fixed  |physical|edge |-     |-
962810635.694923   CPU 0/KVM       269394     897        MSI       /-   0x2/40     Fixed  |physical|edge |-     |-
962811776.481253   vhost-269394    269403     3719       MSI       /-   0x3/40     Fixed  |physical|edge |-     |-
962811776.523581   vhost-529746    529767     1664       MSI       /-   0x1/40     Fixed  |physical|edge |-     |-
962811776.654516   vhost-426070    426078     1522       MSI       /-   0x5/35     Fixed  |physical|edge |-     |-
962812652.302519   CPU 2/KVM       269394     2605       MSI       /-   0x2/40     Fixed  |physical|edge |-     |-
962812652.342239   CPU 2/KVM       269394     749        MSI       /-   0x2/40     Fixed  |physical|edge |-     |-
962813344.856419   qemu-system-x86 269394     23230      IOAPIC    /21  0x4/39     Fixed  |physical|level|-     |-
962813344.899277   qemu-system-x86 269394     5472       PIC slave /2   -  /-      -      |-       |level|masked|-

详细代码:https://github.com/nanshuaibo/lmp/tree/develop/eBPF_Supermarket/kvm_watcher

  • 5
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值