Now that GICv5 has arrived, the arch timer requires some TLC to address some of the key differences introduced with GICv5. For PPIs on GICv5, the set_pending_state and queue_irq_unlock irq_ops are used as AP lists are not required at all for GICv5. The arch timer also introduces an irq_op - get_input_level. Extend the arch-timer-provided irq_ops to include the two PPI ops for vgic_v5 guests. When possible, DVI (Direct Virtual Interrupt) is set for PPIs when using a vgic_v5, which directly inject the pending state in to the guest. This means that the host never sees the interrupt for the guest for these interrupts. This has two impacts. * First of all, the kvm_cpu_has_pending_timer check is updated to explicitly check if the timers are expected to fire. * Secondly, for mapped timers (which use DVI) they must be masked on the host prior to entering a GICv5 guest, and unmasked on the return path. This is handled in set_timer_irq_phys_masked. The final, but rather important, change is that the architected PPIs for the timers are made mandatory for a GICv5 guest. Attempts to set them to anything else are actively rejected. Once a vgic_v5 is initialised, the arch timer PPIs are also explicitly reinitialised to ensure the correct GICv5-compatible PPIs are used - this also adds in the GICv5 PPI type to the intid. Signed-off-by: Sascha Bischoff --- arch/arm64/kvm/arch_timer.c | 114 +++++++++++++++++++++++++++----- arch/arm64/kvm/vgic/vgic-init.c | 9 +++ arch/arm64/kvm/vgic/vgic-v5.c | 6 +- include/kvm/arm_arch_timer.h | 7 +- include/kvm/arm_vgic.h | 5 ++ 5 files changed, 119 insertions(+), 22 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 6f033f6644219..b0a5a6c6bf8da 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -56,6 +56,17 @@ static struct irq_ops arch_timer_irq_ops = { .get_input_level = kvm_arch_timer_get_input_level, }; +static struct irq_ops arch_timer_irq_ops_vgic_v5 = { + .get_input_level = kvm_arch_timer_get_input_level, + .set_pending_state = vgic_v5_ppi_set_pending_state, + .queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock, +}; + +static bool vgic_is_v5(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V5; +} + static int nr_timers(struct kvm_vcpu *vcpu) { if (!vcpu_has_nv(vcpu)) @@ -396,7 +407,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + return kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer) || + (vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0); } /* @@ -657,6 +672,24 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo WARN_ON(r); } +/* + * On GICv5 we use DVI for the arch timer PPIs. This is restored later + * on as part of vgic_load. Therefore, in order to avoid the guest's + * interrupt making it to the host we mask it before entering the + * guest and unmask it again when we return. + */ +static inline void set_timer_irq_phys_masked(struct arch_timer_context *ctx, bool masked) +{ + if (masked) { + disable_percpu_irq(ctx->host_timer_irq); + } else { + if (ctx->host_timer_irq == host_vtimer_irq) + enable_percpu_irq(ctx->host_timer_irq, host_vtimer_irq_flags); + else + enable_percpu_irq(ctx->host_timer_irq, host_ptimer_irq_flags); + } +} + static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); @@ -675,7 +708,10 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) phys_active |= ctx->irq.level; - set_timer_irq_phys_active(ctx, phys_active); + if (!vgic_is_v5(vcpu)) + set_timer_irq_phys_active(ctx, phys_active); + else + set_timer_irq_phys_masked(ctx, true); } static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) @@ -719,10 +755,14 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, struct timer_map *map) { int hw, ret; + struct irq_ops *ops; if (!irqchip_in_kernel(vcpu->kvm)) return; + ops = vgic_is_v5(vcpu) ? &arch_timer_irq_ops_vgic_v5 : + &arch_timer_irq_ops; + /* * We only ever unmap the vtimer irq on a VHE system that runs nested * virtualization, in which case we have both a valid emul_vtimer, @@ -741,12 +781,12 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, ret = kvm_vgic_map_phys_irq(vcpu, map->direct_vtimer->host_timer_irq, timer_irq(map->direct_vtimer), - &arch_timer_irq_ops); + ops); WARN_ON_ONCE(ret); ret = kvm_vgic_map_phys_irq(vcpu, map->direct_ptimer->host_timer_irq, timer_irq(map->direct_ptimer), - &arch_timer_irq_ops); + ops); WARN_ON_ONCE(ret); } } @@ -864,7 +904,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); if (static_branch_likely(&has_gic_active_state)) { - if (vcpu_has_nv(vcpu)) + /* We don't do NV on GICv5, yet */ + if (vcpu_has_nv(vcpu) && !vgic_is_v5(vcpu)) kvm_timer_vcpu_load_nested_switch(vcpu, &map); kvm_timer_vcpu_load_gic(map.direct_vtimer); @@ -934,6 +975,15 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) if (kvm_vcpu_is_blocking(vcpu)) kvm_timer_blocking(vcpu); + + /* Unmask again on GICV5 */ + if (vgic_is_v5(vcpu)) { + set_timer_irq_phys_masked(map.direct_vtimer, false); + + if (map.direct_ptimer) + set_timer_irq_phys_masked(map.direct_ptimer, false); + + } } void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) @@ -1034,12 +1084,15 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) if (timer->enabled) { for (int i = 0; i < nr_timers(vcpu); i++) kvm_timer_update_irq(vcpu, false, - vcpu_get_timer(vcpu, i)); + vcpu_get_timer(vcpu, i)); if (irqchip_in_kernel(vcpu->kvm)) { - kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); + kvm_vgic_reset_mapped_irq( + vcpu, timer_irq(map.direct_vtimer)); if (map.direct_ptimer) - kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); + kvm_vgic_reset_mapped_irq( + vcpu, + timer_irq(map.direct_ptimer)); } } @@ -1092,10 +1145,19 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) HRTIMER_MODE_ABS_HARD); } +/* + * This is always called during kvm_arch_init_vm, but will also be + * called from kvm_vgic_create if we have a vGICv5. + */ void kvm_timer_init_vm(struct kvm *kvm) { + /* + * Set up the default PPIs - note that we adjust them based on + * the model of the GIC as GICv5 uses a different way to + * describing interrupts. + */ for (int i = 0; i < NR_KVM_TIMERS; i++) - kvm->arch.timer_data.ppi[i] = default_ppi[i]; + kvm->arch.timer_data.ppi[i] = get_vgic_ppi(kvm, default_ppi[i]); } void kvm_timer_cpu_up(void) @@ -1347,6 +1409,7 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info) } arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; + arch_timer_irq_ops_vgic_v5.flags |= VGIC_IRQ_SW_RESAMPLE; WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, (void *)TIMER_VTIMER)); } @@ -1497,10 +1560,12 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) break; /* - * We know by construction that we only have PPIs, so - * all values are less than 32. + * We know by construction that we only have PPIs, so all values + * are less than 32. However, we mask off most of the bits as we + * might be presented with a GICv5 style PPI where the type is + * encoded in the top-bits. */ - ppis |= BIT(irq); + ppis |= BIT(irq & 0x1f); } valid = hweight32(ppis) == nr_timers(vcpu); @@ -1538,7 +1603,9 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct timer_map map; + struct irq_ops *ops; int ret; + int irq; if (timer->enabled) return 0; @@ -1556,20 +1623,22 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; } + ops = vgic_is_v5(vcpu) ? &arch_timer_irq_ops_vgic_v5 : + &arch_timer_irq_ops; + get_timer_map(vcpu, &map); - ret = kvm_vgic_map_phys_irq(vcpu, - map.direct_vtimer->host_timer_irq, - timer_irq(map.direct_vtimer), - &arch_timer_irq_ops); + irq = timer_irq(map.direct_vtimer); + ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, + irq, ops); if (ret) return ret; if (map.direct_ptimer) { + irq = timer_irq(map.direct_ptimer); ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, - timer_irq(map.direct_ptimer), - &arch_timer_irq_ops); + irq, ops); } if (ret) @@ -1627,6 +1696,15 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) goto out; } + /* + * The PPIs for the Arch Timers arch architecturally defined for + * GICv5. Reject anything that changes them from the specified value. + */ + if (vgic_is_v5(vcpu) && vcpu->kvm->arch.timer_data.ppi[idx] != irq) { + ret = -EINVAL; + goto out; + } + /* * We cannot validate the IRQ unicity before we run, so take it at * face value. The verdict will be given on first vcpu run, for each diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 120f28b329738..5955dcbfd051f 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -177,6 +177,15 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); } else { aa64pfr2 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP); + + /* + * We now know that we have a GICv5. The Arch Timer PPI + * interrupts may have been initialised at this stage, but will + * have done so assuming that we have an older GIC, meaning that + * the IntIDs won't be correct. We init them again, and this + * time they will be correct. + */ + kvm_timer_init_vm(kvm); } kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index a3d52ce066869..1a6c9fc86ed07 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -166,7 +166,7 @@ static void vgic_v5_construct_hmrs(struct kvm_vcpu *vcpu) } } -static bool vgic_v5_ppi_set_pending_state(struct kvm_vcpu *vcpu, +bool vgic_v5_ppi_set_pending_state(struct kvm_vcpu *vcpu, struct vgic_irq *irq) { struct vgic_v5_cpu_if *cpu_if; @@ -196,8 +196,8 @@ static bool vgic_v5_ppi_set_pending_state(struct kvm_vcpu *vcpu, * save/restore, but don't need the PPIs to be queued on a per-VCPU AP * list. Therefore, sanity check the state, unlock, and return. */ -static bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags) +bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags) __releases(&irq->irq_lock) { struct kvm_vcpu *vcpu; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 7310841f45121..6cb9c20f9db65 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -10,6 +10,8 @@ #include #include +#include + enum kvm_arch_timers { TIMER_PTIMER, TIMER_VTIMER, @@ -47,7 +49,7 @@ struct arch_timer_vm_data { u64 poffset; /* The PPI for each timer, global to the VM */ - u8 ppi[NR_KVM_TIMERS]; + u32 ppi[NR_KVM_TIMERS]; }; struct arch_timer_context { @@ -130,6 +132,9 @@ void kvm_timer_init_vhe(void); #define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data)) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) +#define get_vgic_ppi(k, i) (((k)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V5) ? \ + (i) : ((i) | FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_PPI))) + u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, enum kvm_arch_timer_regs treg); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 099b8ac02999e..6863e19d6eeb7 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -533,6 +533,11 @@ int vgic_v4_load(struct kvm_vcpu *vcpu); void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu); +bool vgic_v5_ppi_set_pending_state(struct kvm_vcpu *vcpu, + struct vgic_irq *irq); +bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags); + bool vgic_state_is_nested(struct kvm_vcpu *vcpu); /* CPU HP callbacks */ -- 2.34.1