结合前面的VT-d学习系列文章,我们知道VT-d上引入的Posted Interrupt机制可以让直通设备的中断直接投递到正在运行的目标vCPU上。那么想一下,普通的模拟的中断是否也可以利用VT-d引入的Posted Interrupt Decriptor
机制投递给正在运行的目标vCPU呢?
答案显然是可以的。如果我们知道中断要投递给那个目标vCPU,并且要vector号也是知道的,那么hypervisor软件完全可以根据当前状态直接去更新目标vCPU的Posted Interrupt Decriptor
里面的IRR
域,然后给正在non-root模式的vCPU发一个Posted Interrupt Notification Vector
的ipi中断。那么目标vCPU也会跟VT-d模式下一样,自动同步IRR
到non-root模式并且在non-root模式接受和处理中断。
明白了原理,那一切就很顺理成章了。 具体的代码实现,可以参考一下Zhang Yang
提交的Posted Interrupt Enable
系列Commit:
lore.kernel.org/kvm/1365679...
VT-x Posted Interrupt
技术的关键也就两点:
- 更新目标vCPU的
pi_desc
的IRR - 给目标vCPU投递中断
这里咱们倒过来看,先看下中断的投递。
1. 给目标vCPU投递Posted Interrupt
大致的调用路径是:
c
kvm_irq_delivery_to_apic
=> kvm_irq_delivery_to_apic_fast
=> kvm_apic_set_irq
=> __apic_accept_irq
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode,
struct dest_map *dest_map)
{
......
case APIC_DM_FIXED:
if (unlikely(trig_mode && !level))
break;
/* FIXME add logic for vcpu on reset */
if (unlikely(!apic_enabled(apic)))
break;
result = 1;
if (dest_map) {
__set_bit(vcpu->vcpu_id, dest_map->map);
dest_map->vectors[vcpu->vcpu_id] = vector;
}
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
if (trig_mode)
kvm_lapic_set_vector(vector,
apic->regs + APIC_TMR);
else
kvm_lapic_clear_vector(vector,
apic->regs + APIC_TMR);
}
if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
kvm_lapic_set_irr(vector, apic);
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
break;
}
其实是在__apic_accept_irq
函数里面,判断中断投递到模式如果是APIC_DM_FIXED
,那么调用kvm_x86_deliver_posted_interrupt
来投递中断。
c
/*
* Send interrupt to vcpu via posted interrupt way.
* 1. If target vcpu is running(non-root mode), send posted interrupt
* notification to vcpu and hardware will sync PIR to vIRR atomically.
* 2. If target vcpu isn't running(root mode), kick it to pick up the
* interrupt from PIR in next vmentry.
*/
static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int r;
r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
if (!r)
return 0;
if (!vcpu->arch.apicv_active)
return -1;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
return 0;
/* If a previous notification has sent the IPI, nothing to do. */
if (pi_test_and_set_on(&vmx->pi_desc))
return 0;
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
return 0;
}
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
bool nested)
{
#ifdef CONFIG_SMP
int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
if (vcpu->mode == IN_GUEST_MODE) {
/*
* The vector of interrupt to be delivered to vcpu had
* been set in PIR before this function.
*
* Following cases will be reached in this block, and
* we always send a notification event in all cases as
* explained below.
*
* Case 1: vcpu keeps in non-root mode. Sending a
* notification event posts the interrupt to vcpu.
*
* Case 2: vcpu exits to root mode and is still
* runnable. PIR will be synced to vIRR before the
* next vcpu entry. Sending a notification event in
* this case has no effect, as vcpu is not in root
* mode.
*
* Case 3: vcpu exits to root mode and is blocked.
* vcpu_block() has already synced PIR to vIRR and
* never blocks vcpu if vIRR is not cleared. Therefore,
* a blocked vcpu here does not wait for any requested
* interrupts in PIR, and sending a notification event
* which has no effect is safe here.
*/
# 给目标PCPU发IPI中断,通知其接受和处理投递的IRR
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
return true;
}
#endif
return false;
}
可以看到vmx_deliver_posted_interrupt
函数就是在投递VT-x Posted Interrupt,最后可以看到其实就是在更新完IRR后,给目标vCPU所在PCPU发Notification IPI中断,通知目标其接受和处理中断。
根据函数kvm_vcpu_trigger_posted_interrupt
的注释可知,不论目标vCPU是否在non-root,或者已经退出到root模式,或者被block住,都可以直接给目标vCPU投递notification IPI中断。
2. 如何更新目标vCPU的IRR
很显然在投递中断之前,需要先获取目标vCPU当前的pending interrupt request
信息,然后按按位或上要投递的vector号,算出IRR然后更新pi_desc
的IRR值,最后再发IPI中断通知目标vCPU接受和处理要投递的vector。
c
static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
if (pi_test_on(&vmx->pi_desc)) {
pi_clear_on(&vmx->pi_desc);
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
kvm_make_request(KVM_REQ_EVENT, vcpu);
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
}
vmx_set_rvi(max_irr);
return max_irr;
}
kvm_apic_update_irr
__kvm_apic_update_irr
bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
{
u32 i, vec;
u32 pir_val, irr_val, prev_irr_val;
int max_updated_irr;
max_updated_irr = -1;
*max_irr = -1;
for (i = vec = 0; i <= 7; i++, vec += 32) {
pir_val = READ_ONCE(pir[i]);
irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
if (pir_val) {
prev_irr_val = irr_val;
irr_val |= xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
if (prev_irr_val != irr_val) {
max_updated_irr =
__fls(irr_val ^ prev_irr_val) + vec;
}
}
if (irr_val)
*max_irr = __fls(irr_val) + vec;
}
return ((max_updated_irr != -1) &&
(max_updated_irr == *max_irr));
}