From: Melody Wang Restricted injection is a feature which enforces additional interrupt and event injection security protections for a SEV-SNP guest. It disables all hypervisor-based interrupt queuing and event injection of all vectors except a new exception vector, #HV (28), which is reserved for SNP guest use, but never generated by hardware. #HV is only allowed to be injected into VMSAs that execute with Restricted Injection. The guests running with the SNP restricted injection feature active limit the host to ringing a doorbell with a #HV exception. Define two fields in the #HV doorbell page: a pending event field, and an EOI assist. Create the structure definition for the #HV doorbell page as per GHCB specification. Co-developed-by: Thomas Lendacky Signed-off-by: Thomas Lendacky Signed-off-by: Melody Wang Signed-off-by: Joerg Roedel --- arch/x86/include/asm/svm.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bcfeb5e7c0ed..9822b0b346ae 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -252,6 +252,39 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_TSC_RATIO_MAX 0x000000ffffffffffULL #define SVM_TSC_RATIO_DEFAULT 0x0100000000ULL +/* + * Hypervisor doorbell page: + * + * Used when Restricted Injection is enabled for a VM. One page in size that + * is shared between the guest and hypervisor to communicate exception and + * interrupt events. + */ +struct hvdb_events { + /* First 64 bytes of HV doorbell page defined in GHCB specification */ + union { + struct { + /* Non-maskable event indicators */ + u16 vector: 8, + nmi: 1, + mce: 1, + reserved2: 5, + no_further_signal: 1; + }; + + u16 pending_events; + }; + + u8 no_eoi_required; + + u8 reserved3[61]; +}; + +struct hvdb { + struct hvdb_events events; + + /* Remainder of the page is for software use */ + u8 reserved[PAGE_SIZE - sizeof(struct hvdb_events)]; +}; /* AVIC */ #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFFULL) -- 2.53.0 From: Melody Wang To support Restricted Injection, the SEV-SNP guest must register a doorbell page for use with #HV. This is done using the #HV doorbell page NAE event. This event consists of four actions: GET_PREFERRED, SET, QUERY, CLEAR. Implement it per the GHCB specification. Co-developed-by: Thomas Lendacky Signed-off-by: Thomas Lendacky Signed-off-by: Melody Wang Signed-off-by: Joerg Roedel --- arch/x86/include/uapi/asm/svm.h | 5 +++ arch/x86/kvm/svm/sev.c | 71 +++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 3 ++ arch/x86/kvm/svm/svm.h | 2 + 4 files changed, 81 insertions(+) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 010a45c9f614..d84a13ac4627 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -117,6 +117,11 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_HVDB_PAGE 0x80000014ull +#define SVM_VMGEXIT_HVDB_GET_PREFERRED 0 +#define SVM_VMGEXIT_HVDB_SET 1 +#define SVM_VMGEXIT_HVDB_QUERY 2 +#define SVM_VMGEXIT_HVDB_CLEAR 3 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull #define SVM_VMGEXIT_SAVIC 0x8000001aull #define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6c6a6d663e29..b9ad1169cb2c 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3522,6 +3522,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) control->exit_info_1 == control->exit_info_2) goto vmgexit_err; break; + case SVM_VMGEXIT_HVDB_PAGE: + if (!is_sev_snp_guest(vcpu)) + goto vmgexit_err; + break; default: reason = GHCB_ERR_INVALID_EVENT; goto vmgexit_err; @@ -4341,6 +4345,65 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r return 1; /* resume guest */ } +static int sev_snp_hv_doorbell_page(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_host_map hvdb_map; + gpa_t hvdb_gpa; + u64 request; + + if (!is_sev_snp_guest(vcpu)) + return -EINVAL; + + request = svm->vmcb->control.exit_info_1; + hvdb_gpa = svm->vmcb->control.exit_info_2; + + switch (request) { + case SVM_VMGEXIT_HVDB_GET_PREFERRED: + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, ~0ULL); + break; + case SVM_VMGEXIT_HVDB_SET: + svm->sev_es.hvdb_gpa = INVALID_PAGE; + + if (!PAGE_ALIGNED(hvdb_gpa)) { + vcpu_unimpl(vcpu, "vmgexit: unaligned #HV doorbell page address [%#llx] from guest\n", + hvdb_gpa); + return -EINVAL; + } + + if (!page_address_valid(vcpu, hvdb_gpa)) { + vcpu_unimpl(vcpu, "vmgexit: invalid #HV doorbell page address [%#llx] from guest\n", + hvdb_gpa); + return -EINVAL; + } + + /* Map and unmap the GPA just to be sure the GPA is valid */ + if (kvm_vcpu_map(vcpu, gpa_to_gfn(hvdb_gpa), &hvdb_map)) { + vcpu_unimpl(vcpu, "vmgexit: error mapping #HV doorbell page [%#llx] from guest\n", + hvdb_gpa); + return -EINVAL; + } + kvm_vcpu_unmap(vcpu, &hvdb_map); + + svm->sev_es.hvdb_gpa = hvdb_gpa; + fallthrough; + case SVM_VMGEXIT_HVDB_QUERY: + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, svm->sev_es.hvdb_gpa); + break; + case SVM_VMGEXIT_HVDB_CLEAR: + svm->sev_es.hvdb_gpa = INVALID_PAGE; + break; + default: + svm->sev_es.hvdb_gpa = INVALID_PAGE; + + vcpu_unimpl(vcpu, "vmgexit: invalid #HV doorbell page request [%#llx] from guest\n", + request); + return -EINVAL; + } + + return 0; +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4617,6 +4680,14 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) case SVM_VMGEXIT_EXT_GUEST_REQUEST: ret = snp_handle_ext_guest_req(svm, control->exit_info_1, control->exit_info_2); break; + case SVM_VMGEXIT_HVDB_PAGE: + if (sev_snp_hv_doorbell_page(svm)) { + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); + } + + ret = 1; + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e02a38da5296..7981e7583384 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1277,6 +1277,9 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu) svm->nmi_masked = false; svm->awaiting_iret_completion = false; + + if (is_sev_es_guest(vcpu)) + svm->sev_es.hvdb_gpa = INVALID_PAGE; } static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 5137416be593..fb956c37c941 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -270,6 +270,8 @@ struct vcpu_sev_es_state { gpa_t snp_vmsa_gpa; bool snp_ap_waiting_for_reset; bool snp_has_guest_vmsa; + + gpa_t hvdb_gpa; }; struct vcpu_svm { -- 2.53.0 From: Melody Wang When Restricted Injection is active, only #HV exceptions can be injected into the SEV-SNP guest. Detect that, and then follow the #HV doorbell communication from the GHCB specification to inject the interrupt or exception. Co-developed-by: Thomas Lendacky Signed-off-by: Thomas Lendacky Signed-off-by: Melody Wang Signed-off-by: Joerg Roedel --- arch/x86/kvm/svm/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 14 +++- arch/x86/kvm/svm/svm.h | 21 ++++++ 3 files changed, 197 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b9ad1169cb2c..f2f40f81ba86 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -5380,3 +5380,167 @@ void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa) free_page((unsigned long)vmsa); } + +static void prepare_hv_injection(struct vcpu_svm *svm, struct hvdb *hvdb) +{ + if (hvdb->events.no_further_signal) + return; + + svm->vmcb->control.event_inj = HV_VECTOR | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID; + svm->vmcb->control.event_inj_err = 0; + + hvdb->events.no_further_signal = 1; +} + +static void unmap_hvdb(struct kvm_vcpu *vcpu, struct kvm_host_map *map) +{ + kvm_vcpu_unmap(vcpu, map); +} + +static struct hvdb *map_hvdb(struct kvm_vcpu *vcpu, struct kvm_host_map *map) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (!VALID_PAGE(svm->sev_es.hvdb_gpa)) + return NULL; + + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->sev_es.hvdb_gpa), map)) { + vcpu_unimpl(vcpu, "snp: error mapping #HV doorbell page [%#llx] from guest\n", + svm->sev_es.hvdb_gpa); + + return NULL; + } + + return map->hva; +} + +static void __sev_snp_inject(enum inject_type type, struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map hvdb_map; + struct hvdb *hvdb; + + hvdb = map_hvdb(vcpu, &hvdb_map); + if (!hvdb) { + WARN_ONCE(1, "Restricted Injection enabled, hvdb page mapping failed\n"); + return; + } + + hvdb->events.vector = vcpu->arch.interrupt.nr; + + prepare_hv_injection(svm, hvdb); + + unmap_hvdb(vcpu, &hvdb_map); +} + +bool sev_snp_queue_exception(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (!sev_snp_is_rinj_active(vcpu)) + return false; + + /* + * Restricted Injection is enabled, only #HV is supported. + * If the vector is not HV_VECTOR, do not inject the exception, + * then return true to skip the original injection path. + */ + if (WARN_ONCE(vcpu->arch.exception.vector != HV_VECTOR, + "Restricted Injection enabled, exception vector %u injection not supported\n", + vcpu->arch.exception.vector)) + return true; + + /* + * An intercept likely occurred during #HV delivery, so re-inject it + * using the current HVDB pending event values. + */ + svm->vmcb->control.event_inj = HV_VECTOR | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID; + svm->vmcb->control.event_inj_err = 0; + + return true; +} + +bool sev_snp_inject(enum inject_type type, struct kvm_vcpu *vcpu) +{ + if (!sev_snp_is_rinj_active(vcpu)) + return false; + + __sev_snp_inject(type, vcpu); + + return true; +} + +void sev_snp_cancel_injection(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map hvdb_map; + struct hvdb *hvdb; + + if (!sev_snp_is_rinj_active(vcpu)) + return; + + if (!svm->vmcb->control.event_inj) + return; + + if (WARN_ONCE((svm->vmcb->control.event_inj & SVM_EVTINJ_VEC_MASK) != HV_VECTOR, + "Restricted Injection enabled, %u vector not supported\n", + svm->vmcb->control.event_inj & SVM_EVTINJ_VEC_MASK)) + return; + + /* + * Copy the information in the doorbell page into the event injection + * fields to complete the cancellation flow. + */ + hvdb = map_hvdb(vcpu, &hvdb_map); + if (!hvdb) + return; + + if (!hvdb->events.pending_events) { + /* No pending events, then event_inj field should be 0 */ + WARN_ON_ONCE(svm->vmcb->control.event_inj); + goto out; + } + + /* Copy info back into event_inj field (replaces #HV) */ + svm->vmcb->control.event_inj = SVM_EVTINJ_VALID; + + if (hvdb->events.vector) + svm->vmcb->control.event_inj |= hvdb->events.vector | + SVM_EVTINJ_TYPE_INTR; + + hvdb->events.pending_events = 0; + +out: + unmap_hvdb(vcpu, &hvdb_map); +} + +/* + * sev_snp_blocked() is for each vector - interrupt, NMI and MCE. It is + * checking if there is an interrupt handled by the guest when + * another interrupt is pending. So hvdb->events.vector will be used for + * checking while no_further_signal is signaling to the guest that a #HV + * is presented by the hypervisor. So no_further_signal is checked when + * a #HV needs to be presented to the guest. + */ +bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu) +{ + struct kvm_host_map hvdb_map; + struct hvdb *hvdb; + bool blocked; + + /* Indicate interrupts are blocked if doorbell page can't be mapped */ + hvdb = map_hvdb(vcpu, &hvdb_map); + if (!hvdb) + return true; + + /* Indicate interrupts blocked based on guest acknowledgment */ + blocked = !!hvdb->events.vector; + + unmap_hvdb(vcpu, &hvdb_map); + + return blocked; +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7981e7583384..7253936c460c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -392,6 +392,9 @@ static void svm_inject_exception(struct kvm_vcpu *vcpu) svm_update_soft_interrupt_rip(vcpu, ex->vector)) return; + if (sev_snp_queue_exception(vcpu)) + return; + svm->vmcb->control.event_inj = ex->vector | SVM_EVTINJ_VALID | (ex->has_error_code ? SVM_EVTINJ_VALID_ERR : 0) @@ -3818,9 +3821,11 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) } trace_kvm_inj_virq(intr->nr, intr->soft, reinjected); - ++vcpu->stat.irq_injections; - svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type; + if (!sev_snp_inject(INJECT_IRQ, vcpu)) + svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type; + + ++vcpu->stat.irq_injections; } static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu) @@ -3995,6 +4000,9 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; + if (sev_snp_is_rinj_active(vcpu)) + return sev_snp_blocked(INJECT_IRQ, vcpu); + if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) @@ -4345,6 +4353,8 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; + sev_snp_cancel_injection(vcpu); + control->exit_int_info = control->event_inj; control->exit_int_info_err = control->event_inj_err; control->event_inj = 0; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index fb956c37c941..a22ad5de03ea 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -55,6 +55,10 @@ extern int tsc_aux_uret_slot __ro_after_init; extern struct kvm_x86_ops svm_x86_ops __initdata; +enum inject_type { + INJECT_IRQ, +}; + /* * Clean bits in VMCB. * VMCB_ALL_CLEAN_MASK might also need to @@ -971,6 +975,17 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu); void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa); +bool sev_snp_queue_exception(struct kvm_vcpu *vcpu); +bool sev_snp_inject(enum inject_type type, struct kvm_vcpu *vcpu); +void sev_snp_cancel_injection(struct kvm_vcpu *vcpu); +bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu); +static inline bool sev_snp_is_rinj_active(struct kvm_vcpu *vcpu) +{ + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; + + return is_sev_snp_guest(vcpu) && + (sev->vmsa_features & SVM_SEV_FEAT_RESTRICTED_INJECTION); +}; #else static inline struct page *snp_safe_alloc_page_node(int node, gfp_t gfp) { @@ -1008,6 +1023,12 @@ static inline struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu) return NULL; } static inline void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa) {} + +static inline bool sev_snp_queue_exception(struct kvm_vcpu *vcpu) { return false; } +static inline bool sev_snp_inject(enum inject_type type, struct kvm_vcpu *vcpu) { return false; } +static inline void sev_snp_cancel_injection(struct kvm_vcpu *vcpu) {} +static inline bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu) { return false; } +static inline bool sev_snp_is_rinj_active(struct kvm_vcpu *vcpu) { return false; } #endif /* vmenter.S */ -- 2.53.0 From: Melody Wang When Restricted Injection is active, only #HV exceptions can be injected into the SEV-SNP guest. Detect that, and then follow the #HV doorbell communication from the GHCB specification to inject NMIs. Co-developed-by: Thomas Lendacky Signed-off-by: Thomas Lendacky Signed-off-by: Melody Wang Signed-off-by: Joerg Roedel --- arch/x86/kvm/svm/sev.c | 19 ++++++++++++++++--- arch/x86/kvm/svm/svm.c | 8 ++++++++ arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index f2f40f81ba86..b48745fad8c5 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -5428,7 +5428,10 @@ static void __sev_snp_inject(enum inject_type type, struct kvm_vcpu *vcpu) return; } - hvdb->events.vector = vcpu->arch.interrupt.nr; + if (type == INJECT_NMI) + hvdb->events.nmi = 1; + else + hvdb->events.vector = vcpu->arch.interrupt.nr; prepare_hv_injection(svm, hvdb); @@ -5508,10 +5511,17 @@ void sev_snp_cancel_injection(struct kvm_vcpu *vcpu) /* Copy info back into event_inj field (replaces #HV) */ svm->vmcb->control.event_inj = SVM_EVTINJ_VALID; + /* + * KVM only injects a single event each time (prepare_hv_injection), + * so when events.nmi is true, the vector will be zero + */ if (hvdb->events.vector) svm->vmcb->control.event_inj |= hvdb->events.vector | SVM_EVTINJ_TYPE_INTR; + if (hvdb->events.nmi) + svm->vmcb->control.event_inj |= SVM_EVTINJ_TYPE_NMI; + hvdb->events.pending_events = 0; out: @@ -5537,8 +5547,11 @@ bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu) if (!hvdb) return true; - /* Indicate interrupts blocked based on guest acknowledgment */ - blocked = !!hvdb->events.vector; + /* Indicate NMIs and interrupts blocked based on guest acknowledgment */ + if (type == INJECT_NMI) + blocked = hvdb->events.nmi; + else + blocked = !!hvdb->events.vector; unmap_hvdb(vcpu, &hvdb_map); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7253936c460c..5255393986cc 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3738,6 +3738,9 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (sev_snp_inject(INJECT_NMI, vcpu)) + goto status; + svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; if (svm->nmi_l1_to_l2) @@ -3752,6 +3755,8 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) svm->nmi_masked = true; svm_set_iret_intercept(svm); } + +status: ++vcpu->stat.nmi_injections; } @@ -3968,6 +3973,9 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; + if (sev_snp_is_rinj_active(vcpu)) + return sev_snp_blocked(INJECT_NMI, vcpu); + if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) return false; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index a22ad5de03ea..bb0e5bfdb9a6 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -57,6 +57,7 @@ extern struct kvm_x86_ops svm_x86_ops __initdata; enum inject_type { INJECT_IRQ, + INJECT_NMI, }; /* -- 2.53.0 From: Melody Wang When Restricted Injection is active, only #HV exceptions can be injected into the SEV-SNP guest. Detect that, and then follow the #HV doorbell communication from the GHCB specification to inject the MCEs. Co-developed-by: Thomas Lendacky Signed-off-by: Thomas Lendacky Signed-off-by: Melody Wang Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/sev.c | 16 ++++++++++++++-- arch/x86/kvm/svm/svm.c | 17 +++++++++++++++++ arch/x86/kvm/svm/svm.h | 2 ++ arch/x86/kvm/vmx/main.c | 10 ++++++++++ arch/x86/kvm/vmx/vmx.c | 5 +++++ arch/x86/kvm/vmx/x86_ops.h | 1 + arch/x86/kvm/x86.c | 7 +++++++ 9 files changed, 58 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 3776cf5382a2..c8bff1e9325e 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -79,6 +79,7 @@ KVM_X86_OP(inject_exception) KVM_X86_OP(cancel_injection) KVM_X86_OP(interrupt_allowed) KVM_X86_OP(nmi_allowed) +KVM_X86_OP_OPTIONAL(mce_allowed) KVM_X86_OP(get_nmi_mask) KVM_X86_OP(set_nmi_mask) KVM_X86_OP(enable_nmi_window) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f14009f25a3b..43c92f0ada1e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1861,6 +1861,7 @@ struct kvm_x86_ops { void (*cancel_injection)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + int (*mce_allowed)(struct kvm_vcpu *vcpu); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); /* Whether or not a virtual NMI is pending in hardware. */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b48745fad8c5..6d5d66563b0d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -5430,6 +5430,8 @@ static void __sev_snp_inject(enum inject_type type, struct kvm_vcpu *vcpu) if (type == INJECT_NMI) hvdb->events.nmi = 1; + else if (type == INJECT_MCE) + hvdb->events.mce = 1; else hvdb->events.vector = vcpu->arch.interrupt.nr; @@ -5445,6 +5447,11 @@ bool sev_snp_queue_exception(struct kvm_vcpu *vcpu) if (!sev_snp_is_rinj_active(vcpu)) return false; + if (vcpu->arch.exception.vector == MC_VECTOR) { + __sev_snp_inject(INJECT_MCE, vcpu); + return true; + } + /* * Restricted Injection is enabled, only #HV is supported. * If the vector is not HV_VECTOR, do not inject the exception, @@ -5513,7 +5520,7 @@ void sev_snp_cancel_injection(struct kvm_vcpu *vcpu) /* * KVM only injects a single event each time (prepare_hv_injection), - * so when events.nmi is true, the vector will be zero + * so when events.nmi is true, the MCE and vector will be zero. */ if (hvdb->events.vector) svm->vmcb->control.event_inj |= hvdb->events.vector | @@ -5522,6 +5529,9 @@ void sev_snp_cancel_injection(struct kvm_vcpu *vcpu) if (hvdb->events.nmi) svm->vmcb->control.event_inj |= SVM_EVTINJ_TYPE_NMI; + if (hvdb->events.mce) + svm->vmcb->control.event_inj |= MC_VECTOR | SVM_EVTINJ_TYPE_EXEPT; + hvdb->events.pending_events = 0; out: @@ -5547,9 +5557,11 @@ bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu) if (!hvdb) return true; - /* Indicate NMIs and interrupts blocked based on guest acknowledgment */ + /* Indicate NMIs, MCEs and interrupts blocked based on guest acknowledgment */ if (type == INJECT_NMI) blocked = hvdb->events.nmi; + else if (type == INJECT_MCE) + blocked = hvdb->events.mce; else blocked = !!hvdb->events.vector; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5255393986cc..295e02c17b9b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4049,6 +4049,22 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) return 1; } +bool svm_mce_blocked(struct kvm_vcpu *vcpu) +{ + if (sev_snp_is_rinj_active(vcpu)) + return sev_snp_blocked(INJECT_MCE, vcpu); + + return false; +} + +static int svm_mce_allowed(struct kvm_vcpu *vcpu) +{ + if (svm_mce_blocked(vcpu)) + return 0; + + return 1; +} + static void svm_enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5362,6 +5378,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = { .cancel_injection = svm_cancel_injection, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, + .mce_allowed = svm_mce_allowed, .get_nmi_mask = svm_get_nmi_mask, .set_nmi_mask = svm_set_nmi_mask, .enable_nmi_window = svm_enable_nmi_window, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index bb0e5bfdb9a6..7d27ed7099a8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -58,6 +58,7 @@ extern struct kvm_x86_ops svm_x86_ops __initdata; enum inject_type { INJECT_IRQ, INJECT_NMI, + INJECT_MCE, }; /* @@ -801,6 +802,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void disable_nmi_singlestep(struct vcpu_svm *svm); bool svm_smi_blocked(struct kvm_vcpu *vcpu); bool svm_nmi_blocked(struct kvm_vcpu *vcpu); +bool svm_mce_blocked(struct kvm_vcpu *vcpu); bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); void svm_set_gif(struct vcpu_svm *svm, bool value); int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index dbebddf648be..f9c4703dda54 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -812,6 +812,15 @@ static void vt_cancel_hv_timer(struct kvm_vcpu *vcpu) } #endif +static int vt_mce_allowed(struct kvm_vcpu *vcpu) +{ + if (is_td_vcpu(vcpu)) + return 0; + + return vmx_mce_allowed(vcpu); +} + + static void vt_setup_mce(struct kvm_vcpu *vcpu) { if (is_td_vcpu(vcpu)) @@ -945,6 +954,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .cancel_injection = vt_op(cancel_injection), .interrupt_allowed = vt_op(interrupt_allowed), .nmi_allowed = vt_op(nmi_allowed), + .mce_allowed = vt_op(mce_allowed), .get_nmi_mask = vt_op(get_nmi_mask), .set_nmi_mask = vt_op(set_nmi_mask), .enable_nmi_window = vt_op(enable_nmi_window), diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b9103de01428..a82a4197d18a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5220,6 +5220,11 @@ int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !vmx_interrupt_blocked(vcpu); } +int vmx_mce_allowed(struct kvm_vcpu *vcpu) +{ + return 1; +} + int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { void __user *ret; diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index d09abeac2b56..b75dfe7f039d 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -92,6 +92,7 @@ void vmx_inject_exception(struct kvm_vcpu *vcpu); void vmx_cancel_injection(struct kvm_vcpu *vcpu); int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection); int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection); +int vmx_mce_allowed(struct kvm_vcpu *vcpu); bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); void vmx_enable_nmi_window(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0550359ed798..4b6b628efa21 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10794,6 +10794,12 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, kvm_update_dr7(vcpu); } + if (vcpu->arch.exception.vector == MC_VECTOR) { + r = static_call(kvm_x86_mce_allowed)(vcpu); + if (!r) + goto out_except; + } + kvm_inject_exception(vcpu); vcpu->arch.exception.pending = false; @@ -10801,6 +10807,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, can_inject = false; } +out_except: /* Don't inject interrupts if the user asked to avoid doing so */ if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) -- 2.53.0 From: Melody Wang Enable Restricted Injection in an SEV-SNP guest by setting the corresponding bit in the VMSA SEV features field (SEV_FEATURES[3]) from QEMU. Add Restricted Injection to the supported hypervisor features. Co-developed-by: Thomas Lendacky Signed-off-by: Thomas Lendacky Signed-off-by: Melody Wang Signed-off-by: Joerg Roedel --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/sev-common.h | 1 + arch/x86/kvm/svm/sev.c | 26 +++++++++++++++++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1d506e5d6f46..41af7bd2473c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -453,6 +453,7 @@ #define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ +#define X86_FEATURE_RESTRICTED_INJECTION (19*32+12) /* Restricted Injection */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ #define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ #define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 01a6e4dbe423..ee17a3541b55 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -136,6 +136,7 @@ enum psc_op { #define GHCB_HV_FT_SNP BIT_ULL(0) #define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) +#define GHCB_HV_FT_SNP_RINJ (BIT_ULL(2) | GHCB_HV_FT_SNP_AP_CREATION) #define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5) /* diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6d5d66563b0d..369fb1e36f58 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -39,7 +39,9 @@ #define GHCB_VERSION_MAX 2ULL #define GHCB_VERSION_MIN 1ULL -#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION) +#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | \ + GHCB_HV_FT_SNP_AP_CREATION | \ + GHCB_HV_FT_SNP_RINJ) /* * The GHCB spec essentially states that all non-zero error codes other than @@ -63,6 +65,10 @@ module_param_named(sev_es, sev_es_enabled, bool, 0444); static bool __ro_after_init sev_snp_enabled = true; module_param_named(sev_snp, sev_snp_enabled, bool, 0444); +/* enable/disable SEV-SNP Restricted Injection support */ +static bool sev_snp_restricted_injection_enabled = true; +module_param_named(restricted_injection, sev_snp_restricted_injection_enabled, bool, 0444); + static unsigned int __ro_after_init nr_ciphertext_hiding_asids; module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444); @@ -3223,6 +3229,12 @@ void __init sev_hardware_setup(void) if (sev_snp_enabled && tsc_khz && cpu_feature_enabled(X86_FEATURE_SNP_SECURE_TSC)) sev_supported_vmsa_features |= SVM_SEV_FEAT_SECURE_TSC; + + if (!sev_snp_enabled || !cpu_feature_enabled(X86_FEATURE_RESTRICTED_INJECTION)) + sev_snp_restricted_injection_enabled = false; + + if (sev_snp_restricted_injection_enabled) + sev_supported_vmsa_features |= SVM_SEV_FEAT_RESTRICTED_INJECTION; } void sev_hardware_unsetup(void) @@ -4773,10 +4785,20 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); } +static void sev_snp_init_vmcb(struct vcpu_svm *svm) +{ + struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + + /* V_NMI is not supported when Restricted Injection is enabled */ + if (sev->vmsa_features & SVM_SEV_FEAT_RESTRICTED_INJECTION) + svm->vmcb->control.int_ctl &= ~V_NMI_ENABLE_MASK; +} + static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event) { struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm); struct vmcb *vmcb = svm->vmcb01.ptr; + struct kvm_vcpu *vcpu = &svm->vcpu; svm->vmcb->control.misc_ctl |= SVM_MISC_ENABLE_SEV_ES; @@ -4843,6 +4865,8 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event) set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, GHCB_VERSION_MIN, sev_enc_bit)); + if (is_sev_snp_guest(vcpu)) + sev_snp_init_vmcb(svm); } void sev_init_vmcb(struct vcpu_svm *svm, bool init_event) -- 2.53.0 From: Melody Wang The #HV IPI NAE event allows the guest to send an IPI to other vCPUs in the guest when the Restricted Injection feature is enabled. Implement the NAE event as per GHCB specification. Co-developed-by: Thomas Lendacky Signed-off-by: Thomas Lendacky Signed-off-by: Melody Wang Signed-off-by: Joerg Roedel --- arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/lapic.c | 24 +++++++++++++++++++++++- arch/x86/kvm/lapic.h | 2 ++ arch/x86/kvm/svm/sev.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index d84a13ac4627..d281dd21c540 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -122,6 +122,7 @@ #define SVM_VMGEXIT_HVDB_SET 1 #define SVM_VMGEXIT_HVDB_QUERY 2 #define SVM_VMGEXIT_HVDB_CLEAR 3 +#define SVM_VMGEXIT_HV_IPI 0x80000015ull #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull #define SVM_VMGEXIT_SAVIC 0x8000001aull #define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4078e624ca66..ab40a2e4ab9d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2558,7 +2558,7 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, const void *data) { - struct kvm_lapic *apic = to_lapic(this); + struct kvm_lapic *apic = this ? to_lapic(this) : vcpu->arch.apic; unsigned int offset = address - apic->base_address; u32 val; @@ -3583,3 +3583,25 @@ void kvm_lapic_exit(void) static_key_deferred_flush(&apic_sw_disabled); WARN_ON(static_branch_unlikely(&apic_sw_disabled.key)); } + +/* Send IPI by writing ICR with MSR write when X2APIC enabled, with mmio write when XAPIC enabled */ +int kvm_xapic_x2apic_send_ipi(struct kvm_vcpu *vcpu, u64 data) +{ + u32 icr_msr_addr = APIC_BASE_MSR + (APIC_ICR >> 4); + struct kvm_lapic *apic = vcpu->arch.apic; + gpa_t gpa = apic->base_address + APIC_ICR; + + if (!kvm_lapic_enabled(vcpu)) + return 1; + + if (vcpu->arch.apic_base & X2APIC_ENABLE) { + if (!kvm_x2apic_msr_write(vcpu, icr_msr_addr, data)) + return 0; + } else { + if (!apic_mmio_write(vcpu, NULL, gpa, 4, &data)) + return 0; + } + + return 1; +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_xapic_x2apic_send_ipi); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 274885af4ebc..afd440c88981 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -156,6 +156,8 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len); void kvm_lapic_exit(void); +int kvm_xapic_x2apic_send_ipi(struct kvm_vcpu *vcpu, u64 data); + u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic); static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 369fb1e36f58..d04f71836ef7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -35,6 +35,7 @@ #include "svm_ops.h" #include "cpuid.h" #include "trace.h" +#include "lapic.h" #define GHCB_VERSION_MAX 2ULL #define GHCB_VERSION_MIN 1ULL @@ -3538,6 +3539,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) if (!is_sev_snp_guest(vcpu)) goto vmgexit_err; break; + case SVM_VMGEXIT_HV_IPI: + if (!sev_snp_guest(vcpu->kvm)) + goto vmgexit_err; + break; default: reason = GHCB_ERR_INVALID_EVENT; goto vmgexit_err; @@ -4416,6 +4421,22 @@ static int sev_snp_hv_doorbell_page(struct vcpu_svm *svm) return 0; } +static int sev_snp_hv_ipi(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u64 icr_info; + + if (!sev_snp_guest(vcpu->kvm)) + return -EINVAL; + + icr_info = svm->vmcb->control.exit_info_1; + + if (kvm_xapic_x2apic_send_ipi(vcpu, icr_info)) + return -EINVAL; + + return 0; +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4698,6 +4719,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); } + ret = 1; + break; + case SVM_VMGEXIT_HV_IPI: + if (sev_snp_hv_ipi(svm)) { + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); + } ret = 1; break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: -- 2.53.0 From: Paolo Bonzini There have been multiple occurrences of processors introducing a virtual privilege level concept for guests, where the hypervisor hosts multiple copies of a vCPU's register state (or at least of most of it) and provides hypercalls or instructions to switch between them. These include AMD VMPLs, Intel TDX partitions, Microsoft Hyper-V VTLs, and ARM CCA planes. Include documentation on how the feature will be exposed to userspace. In the past, two main solutions that were attempted, mostly in the context of Hyper-V VTLs and SEV-SNP VMPLs: - use a single vCPU file descriptor, and store multiple copies of the state in a single struct kvm_vcpu. This requires a lot of changes to provide multiple copies of affected fields, especially MMUs and APICs; and complex uAPI extensions to direct existing ioctls to a specific privilege level. This solution looked marginally okay for SEV-SNP VMPLs, but only because the copies of the register state were hidden in the VMSA (KVM does not manage it); it showed all its problems when applied to Hyper-V VTLs. - use multiple VM and vCPU file descriptors, and handle the switch entirely in userspace. This got gnarly pretty fast for even more reasons than the previous case, for example because VMs could not share anymore memslots, including dirty bitmaps and private/shared attributes (a substantial problem for SEV-SNP since VMPLs share their ASID). Another problem was the need to share _some_ register state across VTLs and to control that vCPUs did not run in parallel; there needed to be a lot of logic to be added in userspace to ensure that higher-privileged VTL properly interrupted a lower-privileged one. This solution also complicates in-kernel implementation of privilege level switch, or even makes it impossible, because there is no kernel knowledge of the relationship between vCPUs that have the same id but belong to different privilege levels. Especially given the need to accelerate switches in kernel, it is clear that KVM needs some level of knowledge of the relationship between vCPUs that have the same id but belong to different privilege levels. For this reason, I proposed a design that only gives the initial set of VM and vCPU file descriptors the full set of ioctls + struct kvm_run; other privilege levels instead only support a small part of the KVM API. In fact for the vm file descriptor it is only three ioctls: KVM_CHECK_EXTENSION, KVM_SIGNAL_MSI, KVM_SET_MEMORY_ATTRIBUTES. For vCPUs it is basically KVM_GET/SET_*. This solves a lot of the problems in the multiple-file-descriptors solution, namely it gets for free the ability to avoid parallel execution of the same vCPUs in different privilege levels. Changes to the userspace API of course exist, but they are relatively small and more easily backwards compatible, because they boil down to the introduction of new file descriptor kinds instead of having to change the inputs to all affected ioctls. It does share some of the code churn issues in the single-file-descriptor solution; on the other hand a prototype multi-fd VMPL implementation[1] also needed large scale changes which therefore seem unavoidable when privilege levels are provided by hardware, and not a software concept only as is the case for VTLs. hardware [1] https://lore.kernel.org/lkml/cover.1726506534.git.roy.hopkins@suse.com/ Acknowledgements: thanks to everyone who participated in the discussions, you are too many to mention in a small margin. Thanks to Roy Hopkins, Tom Lendacky, Anel Orazgaliyeva, Nicolas Saenz-Julienne for experimenting with implementations of VTLs and VMPLs. Ah, and because x86 has three names for it and Arm has one, choose the Arm name for all architectures to avoid bikeshedding and to displease everyone---including the KVM/arm64 folks, probably. Co-developed-by: Joerg Roedel Signed-off-by: Paolo Bonzini Signed-off-by: Joerg Roedel --- Documentation/virt/kvm/api.rst | 102 +++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 30 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 52bbbb553ce1..d90b4a406454 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -56,6 +56,18 @@ be checked with :ref:`KVM_CHECK_EXTENSION `. Some capabilities also need to be enabled for VMs or VCPUs where their functionality is desired (see :ref:`cap_enable` and :ref:`cap_enable_vm`). +On some architectures, a "virtual privilege level" concept may be present +apart from the usual separation between user and supervisor mode, or +between hypervisor and guest mode. When this is the case, a single vCPU +can have multiple copies of its register state (or at least most of it), +and will switch between them through a special processor instruction, +or through some kind of hypercall. + +KVM calls these privilege levels "planes". Planes other than the +initially-created one (called "plane 0") have a file descriptor each, +and so do the planes of each vCPU. Ioctls for vCPU planes should also +be issued from a single thread, unless specially marked as asynchronous +in the documentation. 2. Restrictions =============== @@ -119,6 +131,11 @@ description: Type: system, vm, or vcpu. + File descriptors for planes other than plane 0 provide a subset + of vm and vcpu ioctls. Those that *are* supported in extra + planes are marked specially in the documentation (for example, + `vcpu (all planes)`). + Parameters: what parameters are accepted by the ioctl. @@ -309,7 +326,7 @@ the VCPU file descriptor can be mmap-ed, including: :Capability: basic :Architectures: all -:Type: vm ioctl +:Type: vm ioctl (all planes) :Parameters: vcpu id (apic id on x86) :Returns: vcpu fd on success, -1 on error @@ -350,6 +367,8 @@ machines, the resulting vcpu fd can be memory mapped at page offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual cpu's hardware control block. +VCPUs for non-zero planes can only be created if the VCPU the same index has +already been created for plane zero. 4.8 KVM_GET_DIRTY_LOG --------------------- @@ -421,7 +440,7 @@ kvm_run' (see below). :Capability: basic :Architectures: all except arm64 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_regs (out) :Returns: 0 on success, -1 on error @@ -461,7 +480,7 @@ Reads the general purpose registers from the vcpu. :Capability: basic :Architectures: all except arm64 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_regs (in) :Returns: 0 on success, -1 on error @@ -475,7 +494,7 @@ See KVM_GET_REGS for the data structure. :Capability: basic :Architectures: x86, ppc -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_sregs (out) :Returns: 0 on success, -1 on error @@ -506,7 +525,7 @@ but not yet injected into the cpu core. :Capability: basic :Architectures: x86, ppc -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_sregs (in) :Returns: 0 on success, -1 on error @@ -519,7 +538,7 @@ data structures. :Capability: basic :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_translation (in/out) :Returns: 0 on success, -1 on error @@ -645,7 +664,7 @@ This is an asynchronous vcpu ioctl and can be invoked from any thread. :Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system) :Architectures: x86 -:Type: system ioctl, vcpu ioctl +:Type: system ioctl, vcpu ioctl (all planes) :Parameters: struct kvm_msrs (in/out) :Returns: number of msrs successfully returned; -1 on error @@ -685,7 +704,7 @@ kvm will fill in the 'data' member. :Capability: basic :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_msrs (in) :Returns: number of msrs successfully set (see below), -1 on error @@ -773,7 +792,7 @@ signal mask. :Capability: basic :Architectures: x86, loongarch -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_fpu (out) :Returns: 0 on success, -1 on error @@ -811,7 +830,7 @@ Reads the floating point state from the vcpu. :Capability: basic :Architectures: x86, loongarch -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_fpu (in) :Returns: 0 on success, -1 on error @@ -1128,7 +1147,7 @@ Other flags returned by ``KVM_GET_CLOCK`` are accepted but ignored. :Capability: KVM_CAP_VCPU_EVENTS :Extended by: KVM_CAP_INTR_SHADOW :Architectures: x86, arm64 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_vcpu_events (out) :Returns: 0 on success, -1 on error @@ -1254,7 +1273,7 @@ Calling this ioctl on a vCPU that hasn't been initialized will return :Capability: KVM_CAP_VCPU_EVENTS :Extended by: KVM_CAP_INTR_SHADOW :Architectures: x86, arm64 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_vcpu_events (in) :Returns: 0 on success, -1 on error @@ -1323,7 +1342,7 @@ Calling this ioctl on a vCPU that hasn't been initialized will return :Capability: KVM_CAP_DEBUGREGS :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_debugregs (out) :Returns: 0 on success, -1 on error @@ -1345,7 +1364,7 @@ Reads debug registers from the vcpu. :Capability: KVM_CAP_DEBUGREGS :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_debugregs (in) :Returns: 0 on success, -1 on error @@ -1662,7 +1681,7 @@ otherwise it will return EBUSY error. :Capability: KVM_CAP_XSAVE :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_xsave (out) :Returns: 0 on success, -1 on error @@ -1682,7 +1701,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace. :Capability: KVM_CAP_XSAVE and KVM_CAP_XSAVE2 :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_xsave (in) :Returns: 0 on success, -1 on error @@ -1710,7 +1729,7 @@ contents of CPUID leaf 0xD on the host. :Capability: KVM_CAP_XCRS :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_xcrs (out) :Returns: 0 on success, -1 on error @@ -1737,7 +1756,7 @@ This ioctl would copy current vcpu's xcrs to the userspace. :Capability: KVM_CAP_XCRS :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_xcrs (in) :Returns: 0 on success, -1 on error @@ -1886,11 +1905,14 @@ The flags bitmap is defined as:: :Capability: KVM_CAP_IRQ_ROUTING :Architectures: x86 s390 arm64 -:Type: vm ioctl +:Type: vm ioctl (all planes) :Parameters: struct kvm_irq_routing (in) :Returns: 0 on success, -1 on error Sets the GSI routing table entries, overwriting any previously set entries. +Note that the kernel maintains only one GSI routing table for all planes. The +plane for which the GSI routing table was set last will receive all interrupts +signaled through GSI pins. On arm64, GSI routing has the following limitation: @@ -2040,7 +2062,7 @@ error. :Capability: KVM_CAP_IRQCHIP :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_lapic_state (out) :Returns: 0 on success, -1 on error @@ -2071,7 +2093,7 @@ always uses xAPIC format. :Capability: KVM_CAP_IRQCHIP :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_lapic_state (in) :Returns: 0 on success, -1 on error @@ -2305,7 +2327,7 @@ prior to calling the KVM_RUN ioctl. :Capability: KVM_CAP_ONE_REG :Architectures: all -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_one_reg (in) :Returns: 0 on success, negative value on failure @@ -2930,7 +2952,7 @@ Following are the KVM-defined registers for x86: :Capability: KVM_CAP_ONE_REG :Architectures: all -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_one_reg (in and out) :Returns: 0 on success, negative value on failure @@ -2984,7 +3006,7 @@ after pausing the vcpu, but before it is resumed. :Capability: KVM_CAP_SIGNAL_MSI :Architectures: x86 arm64 -:Type: vm ioctl +:Type: vm ioctl (all planes) :Parameters: struct kvm_msi (in) :Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error @@ -3605,7 +3627,7 @@ VCPU matching underlying host. :Capability: basic :Architectures: arm64, mips, riscv, x86 (if KVM_CAP_ONE_REG) -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_reg_list (in/out) :Returns: 0 on success; -1 on error @@ -4904,7 +4926,7 @@ The acceptable values for the flags field are:: :Capability: KVM_CAP_NESTED_STATE :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_nested_state (in/out) :Returns: 0 on success, -1 on error @@ -4978,7 +5000,7 @@ to the KVM_CHECK_EXTENSION ioctl(). :Capability: KVM_CAP_NESTED_STATE :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_nested_state (in) :Returns: 0 on success, -1 on error @@ -5859,7 +5881,7 @@ then ``length`` is returned. :Capability: KVM_CAP_SREGS2 :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_sregs2 (out) :Returns: 0 on success, -1 on error @@ -5892,7 +5914,7 @@ flags values for ``kvm_sregs2``: :Capability: KVM_CAP_SREGS2 :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_sregs2 (in) :Returns: 0 on success, -1 on error @@ -6108,7 +6130,7 @@ as the descriptors in Descriptors block. :Capability: KVM_CAP_XSAVE2 :Architectures: x86 -:Type: vcpu ioctl +:Type: vcpu ioctl (all planes) :Parameters: struct kvm_xsave (out) :Returns: 0 on success, -1 on error @@ -6555,6 +6577,26 @@ KVM_S390_KEYOP_SSKE .. _kvm_run: +.. _KVM_CREATE_PLANE: + +4.145 KVM_CREATE_PLANE +---------------------- + +:Capability: KVM_CAP_PLANES +:Architectures: none +:Type: vm ioctl +:Parameters: plane id +:Returns: a VM fd that can be used to control the new plane. + +Creates a new *plane*, i.e. a separate privilege level for the virtual machine. + +Each plane has a numeric id that is used when communicating with KVM. While +KVM is currently agnostic to whether low ids are more or less privileged, it is +expected that this will not always be the case in the future. For example KVM +in the future may use the plane id when planes are supported by hardware (as is +the case for VMPLs in AMD), or if KVM supports accelerated plane switch +operations (as might be the case for Hyper-V VTLs). + 5. The kvm_run structure ======================== -- 2.53.0 From: Paolo Bonzini Introduce a data structure to keep VM-wide per-plane state. Initialize the structure with a back-pointer to struct kvm and the plane level the structure represents. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 7 +++++++ include/uapi/linux/kvm.h | 6 ++++++ virt/kvm/kvm_main.c | 41 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4c14aee1fb06..5be4c9f118b4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -767,6 +767,11 @@ struct kvm_memslots { int node_idx; }; +struct kvm_plane { + struct kvm *kvm; + unsigned level; +}; + struct kvm { #ifdef KVM_HAVE_MMU_RWLOCK rwlock_t mmu_lock; @@ -806,6 +811,8 @@ struct kvm { spinlock_t gpc_lock; struct list_head gpc_list; + struct kvm_plane *planes[KVM_MAX_PLANES]; + /* * created_vcpus is protected by kvm->lock, and is incremented * at the beginning of KVM_CREATE_VCPU. online_vcpus is only diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6c8afa2047bf..813f964a6dc1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -730,6 +730,11 @@ struct kvm_enable_cap { #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) +/* + * Maximum number of supported planes + */ +#define KVM_MAX_PLANES 16 + /* * Extension capability list. */ @@ -996,6 +1001,7 @@ struct kvm_enable_cap { #define KVM_CAP_S390_USER_OPEREXEC 246 #define KVM_CAP_S390_KEYOP 247 #define KVM_CAP_S390_VSIE_ESAMODE 248 +#define KVM_CAP_PLANES 249 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 881f92d7a469..a68469c6d12e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1095,6 +1095,38 @@ static inline struct kvm_io_bus *kvm_get_bus_for_destruction(struct kvm *kvm, static int kvm_enable_virtualization(void); static void kvm_disable_virtualization(void); +static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level) +{ + struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + + if (!plane) + return NULL; + + plane->kvm = kvm; + plane->level = plane_level; + + kvm->planes[plane_level] = plane; + + return plane; +} + +static void kvm_destroy_one_plane(struct kvm_plane *plane) +{ + kfree(plane); +} + +static void kvm_destroy_planes(struct kvm *kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_PLANES; ++i) { + if (kvm->planes[i] == NULL) + continue; + kvm_destroy_one_plane(kvm->planes[i]); + kvm->planes[i] = NULL; + } +} + static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) { struct kvm *kvm = kvm_arch_alloc_vm(); @@ -1127,6 +1159,12 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); + /* Initialize planes array and allocate plane 0 */ + if (kvm_create_plane(kvm, 0) == NULL) { + r = -ENOMEM; + goto out_no_planes; + } + /* * Force subsequent debugfs file creations to fail if the VM directory * is not created (by kvm_create_vm_debugfs()). @@ -1225,6 +1263,8 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); out_err_no_srcu: + kvm_destroy_planes(kvm); +out_no_planes: kvm_arch_free_vm(kvm); mmdrop(current->mm); return ERR_PTR(r); @@ -1304,6 +1344,7 @@ static void kvm_destroy_vm(struct kvm *kvm) xa_destroy(&kvm->mem_attr_array); #endif kvm_arch_free_vm(kvm); + kvm_destroy_planes(kvm); preempt_notifier_dec(); kvm_disable_virtualization(); mmdrop(mm); -- 2.53.0 From: Paolo Bonzini Each plane will have its own set of VCPUs, so move the vcpu_array to the plane structure. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 30 +++++++++++++++++++++--------- virt/kvm/kvm_main.c | 33 +++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5be4c9f118b4..5a72f73a2f31 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -324,12 +324,14 @@ struct kvm_mmio_fragment { struct kvm_vcpu { struct kvm *kvm; + struct kvm_plane *plane; + #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; #endif int cpu; int vcpu_id; /* id given by userspace at creation */ - int vcpu_idx; /* index into kvm->vcpu_array */ + int vcpu_idx; /* index into kvm->planes[]->vcpu_array */ int ____srcu_idx; /* Don't use this directly. You've been warned. */ #ifdef CONFIG_PROVE_RCU int srcu_depth; @@ -770,6 +772,9 @@ struct kvm_memslots { struct kvm_plane { struct kvm *kvm; unsigned level; + + /* Per-Plane VCPU array */ + struct xarray vcpu_array; }; struct kvm { @@ -795,7 +800,6 @@ struct kvm { struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2]; /* The current active memslot set for each address space */ struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES]; - struct xarray vcpu_array; /* * Protected by slots_lock, but can be read outside if an * incorrect answer is acceptable. @@ -996,9 +1000,9 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) lockdep_is_held(&kvm->slots_lock)); } -static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) +static inline struct kvm_vcpu *plane_get_vcpu(struct kvm_plane *plane, int i) { - int num_vcpus = atomic_read(&kvm->online_vcpus); + int num_vcpus = atomic_read(&plane->kvm->online_vcpus); /* * Explicitly verify the target vCPU is online, as the anti-speculation @@ -1012,13 +1016,21 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); - return xa_load(&kvm->vcpu_array, i); + return xa_load(&plane->vcpu_array, i); } -#define kvm_for_each_vcpu(idx, vcpup, kvm) \ - if (atomic_read(&kvm->online_vcpus)) \ - xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ - (atomic_read(&kvm->online_vcpus) - 1)) +static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) +{ + return plane_get_vcpu(kvm->planes[0], i); +} + +#define plane_for_each_vcpu(idx, vcpup, plane) \ + if (atomic_read(&plane->kvm->online_vcpus)) \ + xa_for_each_range(&plane->vcpu_array, idx, vcpup, 0, \ + (atomic_read(&plane->kvm->online_vcpus) - 1)) + +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + plane_for_each_vcpu(idx, vcpup, kvm->planes[0]) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a68469c6d12e..668645dd3945 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -443,6 +443,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) mutex_init(&vcpu->mutex); vcpu->cpu = -1; vcpu->kvm = kvm; + vcpu->plane = kvm->planes[0]; vcpu->vcpu_id = id; vcpu->pid = NULL; rwlock_init(&vcpu->pid_lock); @@ -479,14 +480,14 @@ static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu); } -void kvm_destroy_vcpus(struct kvm *kvm) +static void plane_destroy_vcpus(struct kvm_plane *plane) { unsigned long i; struct kvm_vcpu *vcpu; - kvm_for_each_vcpu(i, vcpu, kvm) { + plane_for_each_vcpu(i, vcpu, plane) { kvm_vcpu_destroy(vcpu); - xa_erase(&kvm->vcpu_array, i); + xa_erase(&plane->vcpu_array, i); /* * Assert that the vCPU isn't visible in any way, to ensure KVM @@ -494,7 +495,22 @@ void kvm_destroy_vcpus(struct kvm *kvm) * in VM-wide request, e.g. to flush remote TLBs when tearing * down MMUs, or to mark the VM dead if a KVM_BUG_ON() fires. */ - WARN_ON_ONCE(xa_load(&kvm->vcpu_array, i) || kvm_get_vcpu(kvm, i)); + WARN_ON_ONCE(xa_load(&plane->vcpu_array, i) || plane_get_vcpu(plane, i)); + } + +} + +void kvm_destroy_vcpus(struct kvm *kvm) +{ + unsigned lvl; + + for (lvl = KVM_MAX_PLANES; lvl > 0; lvl--) { + struct kvm_plane *plane = kvm->planes[lvl - 1]; + + if (plane == NULL) + continue; + + plane_destroy_vcpus(plane); } atomic_set(&kvm->online_vcpus, 0); @@ -1105,6 +1121,8 @@ static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level) plane->kvm = kvm; plane->level = plane_level; + xa_init(&plane->vcpu_array); + kvm->planes[plane_level] = plane; return plane; @@ -1146,7 +1164,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) mutex_init(&kvm->slots_arch_lock); spin_lock_init(&kvm->mn_invalidate_lock); rcuwait_init(&kvm->mn_memslots_update_rcuwait); - xa_init(&kvm->vcpu_array); #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES xa_init(&kvm->mem_attr_array); #endif @@ -4039,7 +4056,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) if (idx == me->vcpu_idx) continue; - vcpu = xa_load(&kvm->vcpu_array, idx); + vcpu = xa_load(&kvm->planes[0]->vcpu_array, idx); if (!READ_ONCE(vcpu->ready)) continue; if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu)) @@ -4258,7 +4275,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) } vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); - r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); + r = xa_insert(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); WARN_ON_ONCE(r == -EBUSY); if (r) goto unlock_vcpu_destroy; @@ -4293,7 +4310,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) kvm_put_xa_erase: mutex_unlock(&vcpu->mutex); kvm_put_kvm_no_destroy(kvm); - xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx); + xa_erase(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx); unlock_vcpu_destroy: mutex_unlock(&kvm->lock); kvm_dirty_ring_free(&vcpu->dirty_ring); -- 2.53.0 From: Joerg Roedel When creating one VCPU object per plane there is still a lot of VCPU state which needes to be shared across all planes. Create struct kvm_vcpu_common as a container for this shared state. Co-developed-by: Carlos López Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/kvm_main.c | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5a72f73a2f31..c4c4922df965 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -322,6 +322,13 @@ struct kvm_mmio_fragment { unsigned int len; }; +struct kvm_vcpu_common { + struct kvm *kvm; + + /* Currently active VCPU */ + struct kvm_vcpu *current_vcpu; +}; + struct kvm_vcpu { struct kvm *kvm; struct kvm_plane *plane; @@ -400,6 +407,9 @@ struct kvm_vcpu { */ struct kvm_memory_slot *last_used_slot; u64 last_used_slot_gen; + + struct kvm_vcpu_common *common; + unsigned plane_level; }; /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 668645dd3945..fb840d029c56 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -438,6 +438,20 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) } #endif +static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm) +{ + struct kvm_vcpu_common *common = kzalloc(sizeof(*common), GFP_KERNEL_ACCOUNT); + + if (common == NULL) + return -ENOMEM; + + common->kvm = kvm; + common->current_vcpu = vcpu; + vcpu->common = common; + + return 0; +} + static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { mutex_init(&vcpu->mutex); @@ -459,14 +473,26 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); vcpu->last_used_slot = NULL; + vcpu->plane_level = 0; + /* Fill the stats id string for the vcpu */ snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", task_pid_nr(current), id); } +static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu) +{ + if (vcpu->plane_level == 0) + kfree(vcpu->common); + + vcpu->common = NULL; +} + static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { kvm_arch_vcpu_destroy(vcpu); + + kvm_vcpu_common_destroy(vcpu); kvm_dirty_ring_free(&vcpu->dirty_ring); /* @@ -1360,8 +1386,8 @@ static void kvm_destroy_vm(struct kvm *kvm) #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES xa_destroy(&kvm->mem_attr_array); #endif - kvm_arch_free_vm(kvm); kvm_destroy_planes(kvm); + kvm_arch_free_vm(kvm); preempt_notifier_dec(); kvm_disable_virtualization(); mmdrop(mm); @@ -4246,11 +4272,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) goto vcpu_decrement; } + r = kvm_vcpu_init_common(vcpu, kvm); + if (r) + goto vcpu_free; + BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) { r = -ENOMEM; - goto vcpu_free; + goto vcpu_free_common; } vcpu->run = page_address(page); @@ -4318,6 +4348,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) kvm_arch_vcpu_destroy(vcpu); vcpu_free_run_page: free_page((unsigned long)vcpu->run); +vcpu_free_common: + kvm_vcpu_common_destroy(vcpu); vcpu_free: kmem_cache_free(kvm_vcpu_cache, vcpu); vcpu_decrement: -- 2.53.0 From: Joerg Roedel Do the accounting of created vcpus and the sanity checks only once per plane. Co-developed-by: Carlos López Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 2 + virt/kvm/kvm_main.c | 108 ++++++++++++++++++++++++--------------- 2 files changed, 68 insertions(+), 42 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c4c4922df965..47144a83f9c5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -325,6 +325,8 @@ struct kvm_mmio_fragment { struct kvm_vcpu_common { struct kvm *kvm; + int vcpu_idx; /* index into kvm->planes[]->vcpu_array */ + /* Currently active VCPU */ struct kvm_vcpu *current_vcpu; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fb840d029c56..14e74cdc4709 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -438,18 +438,58 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) } #endif -static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm) +static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned long id) { - struct kvm_vcpu_common *common = kzalloc(sizeof(*common), GFP_KERNEL_ACCOUNT); + struct kvm_vcpu_common *common __free(kfree) = kzalloc(sizeof(*common), GFP_KERNEL_ACCOUNT); + int r; - if (common == NULL) - return -ENOMEM; + /* + * KVM tracks vCPU IDs as 'int', be kind to userspace and reject + * too-large values instead of silently truncating. + * + * Ensure KVM_MAX_VCPU_IDS isn't pushed above INT_MAX without first + * changing the storage type (at the very least, IDs should be tracked + * as unsigned ints). + */ + BUILD_BUG_ON(KVM_MAX_VCPU_IDS > INT_MAX); + if (id >= KVM_MAX_VCPU_IDS) + return -EINVAL; + + mutex_lock(&kvm->lock); + kvm->created_vcpus++; + mutex_unlock(&kvm->lock); + + if (common == NULL) { + r = -ENOMEM; + goto out_drop_counter; + } + + common->vcpu_idx = atomic_read(&kvm->online_vcpus); common->kvm = kvm; common->current_vcpu = vcpu; - vcpu->common = common; + vcpu->common = no_free_ptr(common); return 0; + +out_drop_counter: + mutex_lock(&kvm->lock); + kvm->created_vcpus--; + mutex_unlock(&kvm->lock); + + return r; +} + +static void kvm_vcpu_finish_common(struct kvm_vcpu *vcpu) +{ + smp_wmb(); + if (vcpu->plane_level == 0) { + /* + * Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu + * pointer before kvm->online_vcpu's incremented value. + */ + atomic_inc(&vcpu->kvm->online_vcpus); + } } static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -482,10 +522,19 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu) { - if (vcpu->plane_level == 0) - kfree(vcpu->common); + struct kvm_vcpu_common *common = vcpu->common; + struct kvm *kvm = common->kvm; vcpu->common = NULL; + + if (vcpu->plane_level != 0) + return; + + mutex_lock(&common->kvm->lock); + kvm->created_vcpus--; + mutex_unlock(&common->kvm->lock); + + kfree(common); } static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -4235,22 +4284,10 @@ static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) */ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) { - int r; + int r = -EINVAL; struct kvm_vcpu *vcpu; struct page *page; - /* - * KVM tracks vCPU IDs as 'int', be kind to userspace and reject - * too-large values instead of silently truncating. - * - * Ensure KVM_MAX_VCPU_IDS isn't pushed above INT_MAX without first - * changing the storage type (at the very least, IDs should be tracked - * as unsigned ints). - */ - BUILD_BUG_ON(KVM_MAX_VCPU_IDS > INT_MAX); - if (id >= KVM_MAX_VCPU_IDS) - return -EINVAL; - mutex_lock(&kvm->lock); if (kvm->created_vcpus >= kvm->max_vcpus) { mutex_unlock(&kvm->lock); @@ -4258,24 +4295,20 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) } r = kvm_arch_vcpu_precreate(kvm, id); - if (r) { - mutex_unlock(&kvm->lock); - return r; - } - - kvm->created_vcpus++; mutex_unlock(&kvm->lock); + if (r) + return r; vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!vcpu) { - r = -ENOMEM; - goto vcpu_decrement; - } + if (!vcpu) + return -ENOMEM; - r = kvm_vcpu_init_common(vcpu, kvm); + r = kvm_vcpu_init_common(vcpu, kvm, id); if (r) goto vcpu_free; + vcpu->vcpu_idx = vcpu->common->vcpu_idx; + BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) { @@ -4304,7 +4337,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) goto unlock_vcpu_destroy; } - vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); r = xa_insert(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); WARN_ON_ONCE(r == -EBUSY); if (r) @@ -4324,12 +4356,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) if (r < 0) goto kvm_put_xa_erase; - /* - * Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu - * pointer before kvm->online_vcpu's incremented value. - */ - smp_wmb(); - atomic_inc(&kvm->online_vcpus); + kvm_vcpu_finish_common(vcpu); mutex_unlock(&vcpu->mutex); mutex_unlock(&kvm->lock); @@ -4352,10 +4379,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) kvm_vcpu_common_destroy(vcpu); vcpu_free: kmem_cache_free(kvm_vcpu_cache, vcpu); -vcpu_decrement: - mutex_lock(&kvm->lock); - kvm->created_vcpus--; - mutex_unlock(&kvm->lock); + return r; } -- 2.53.0 From: Joerg Roedel Introduce accessor functions for the scheduling state in struct kvm_vcpu to make it easier to move these fields to struct kvm_vcpu_common. Signed-off-by: Joerg Roedel --- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/nested.c | 2 +- arch/loongarch/kvm/vcpu.c | 5 +++-- arch/mips/kvm/mips.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/riscv/kvm/vcpu.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/posted_intr.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 12 ++++++------ arch/x86/kvm/xen.h | 2 +- include/linux/kvm_host.h | 20 ++++++++++++++++++++ virt/kvm/kvm_main.c | 6 +++--- 14 files changed, 42 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9453321ef8c6..de00088c9a80 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1253,7 +1253,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu_load(vcpu); - if (!vcpu->wants_to_run) { + if (!kvm_vcpu_wants_to_run(vcpu)) { ret = -EINTR; goto out; } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 6f7bc9a9992e..b84b1edb02d8 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -822,7 +822,7 @@ void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu) * scheduling out and not in WFI emulation, suggesting it is likely to * reuse the MMU sometime soon. */ - if (vcpu->scheduled_out && !vcpu_get_flag(vcpu, IN_WFI)) + if (kvm_vcpu_scheduled_out(vcpu) && !vcpu_get_flag(vcpu, IN_WFI)) return; if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index e28084c49e68..bde8b68b8273 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1847,7 +1847,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) int cpu, idx; unsigned long flags; - if (vcpu->preempted && kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_PREEMPT)) { + if (kvm_vcpu_preempted(vcpu) && + kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_PREEMPT)) { /* * Take the srcu lock as memslots will be accessed to check * the gfn cache generation against the memslots generation. @@ -1887,7 +1888,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) break; } - if (!vcpu->wants_to_run) + if (!kvm_vcpu_wants_to_run(vcpu)) return r; /* Clear exit_reason */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index a53abbba43ea..f928ba105104 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -433,7 +433,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->mmio_needed = 0; } - if (!vcpu->wants_to_run) + if (!kvm_vcpu_wants_to_run(vcpu)) goto out; lose_fpu(1); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 00302399fc37..800867c164c6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1840,7 +1840,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_sigset_activate(vcpu); - if (!vcpu->wants_to_run) + if (!kvm_vcpu_wants_to_run(vcpu)) r = -EINTR; else r = kvmppc_vcpu_run(vcpu); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index a73690eda84b..8519a5bfbdc4 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -862,7 +862,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) return ret; } - if (!vcpu->wants_to_run) { + if (!kvm_vcpu_wants_to_run(vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); return -EINTR; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ffb20a64d328..8401bcad1f37 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4954,7 +4954,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.pv.dumping) return -EINVAL; - if (!vcpu->wants_to_run) + if (!kvm_vcpu_wants_to_run(vcpu)) return -EINTR; if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 295e02c17b9b..1524c1bb4f37 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1475,7 +1475,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu) static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm)) + if (kvm_vcpu_scheduled_out(vcpu) && !kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); if (kvm_vcpu_apicv_active(vcpu)) diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 4a6d9a17da23..cba1e6346fc5 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -239,7 +239,7 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) * the cost of propagating PIR.IRR to PID.ON is negligible compared to * the cost of a spurious IRQ, and vCPU put/load is a slow path. */ - if (!vcpu->preempted && kvm_vcpu_is_blocking(vcpu) && + if (!kvm_vcpu_preempted(vcpu) && kvm_vcpu_is_blocking(vcpu) && ((is_td_vcpu(vcpu) && tdx_interrupt_allowed(vcpu)) || (!is_td_vcpu(vcpu) && !vmx_interrupt_blocked(vcpu)))) pi_enable_wakeup_handler(vcpu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a82a4197d18a..20262855bfe8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1552,7 +1552,7 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) */ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm)) + if (kvm_vcpu_scheduled_out(vcpu) && !kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); vmx_vcpu_load_vmcs(vcpu, cpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4b6b628efa21..6355fe7f546f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5168,7 +5168,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_request_l1tf_flush_l1d(); - if (vcpu->scheduled_out && pmu->version && pmu->event_count) { + if (kvm_vcpu_scheduled_out(vcpu) && pmu->version && pmu->event_count) { pmu->need_cleanup = true; kvm_make_request(KVM_REQ_PMU, vcpu); } @@ -5293,7 +5293,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; - if (vcpu->preempted) { + if (kvm_vcpu_preempted(vcpu)) { /* * Assume protected guests are in-kernel. Inefficient yielding * due to false positives is preferable to never yielding due @@ -10404,7 +10404,7 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) rcu_read_unlock(); - if (!target || !READ_ONCE(target->ready)) + if (!target || !kvm_vcpu_ready(target)) goto no_yield; /* Ignore requests to yield to self */ @@ -12041,7 +12041,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_vcpu_srcu_read_lock(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { - if (!vcpu->wants_to_run) { + if (!kvm_vcpu_wants_to_run(vcpu)) { r = -EINTR; goto out; } @@ -12120,7 +12120,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vcpu->mmio_needed); } - if (!vcpu->wants_to_run) { + if (!kvm_vcpu_wants_to_run(vcpu)) { r = -EINTR; goto out; } @@ -13021,7 +13021,7 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) * only path that can trigger INIT emulation _and_ loads FPU state, and * KVM_RUN should _always_ load FPU state. */ - WARN_ON_ONCE(vcpu->wants_to_run != fpstate->in_use); + WARN_ON_ONCE(kvm_vcpu_wants_to_run(vcpu) != fpstate->in_use); fpu_in_use = fpstate->in_use; if (fpu_in_use) kvm_put_guest_fpu(vcpu); diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 59e6128a7bd3..78793c1ac913 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -206,7 +206,7 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) * behalf of the vCPU. Only if the VMM does actually block * does it need to enter RUNSTATE_blocked. */ - if (WARN_ON_ONCE(!vcpu->preempted)) + if (WARN_ON_ONCE(!kvm_vcpu_preempted(vcpu))) return; kvm_xen_update_runstate(vcpu, RUNSTATE_runnable); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 47144a83f9c5..b334c15d834e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -414,6 +414,26 @@ struct kvm_vcpu { unsigned plane_level; }; +static inline bool kvm_vcpu_wants_to_run(struct kvm_vcpu *vcpu) +{ + return vcpu->wants_to_run; +} + +static inline bool kvm_vcpu_preempted(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->preempted); +} + +static inline bool kvm_vcpu_ready(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->ready); +} + +static inline bool kvm_vcpu_scheduled_out(struct kvm_vcpu *vcpu) +{ + return vcpu->scheduled_out; +} + /* * Start accounting time towards a guest. * Must be called before entering guest context. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 14e74cdc4709..2c16e124a507 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4132,7 +4132,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; vcpu = xa_load(&kvm->planes[0]->vcpu_array, idx); - if (!READ_ONCE(vcpu->ready)) + if (!kvm_vcpu_ready(vcpu)) continue; if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu)) continue; @@ -4143,7 +4143,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) * waiting on IPI delivery, i.e. the target vCPU is in-kernel * for the purposes of directed yield. */ - if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && + if (kvm_vcpu_preempted(vcpu) && yield_to_kernel_mode && !kvm_arch_dy_has_pending_interrupt(vcpu) && !kvm_arch_vcpu_preempted_in_kernel(vcpu)) continue; @@ -6513,7 +6513,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, WRITE_ONCE(vcpu->scheduled_out, true); - if (task_is_runnable(current) && vcpu->wants_to_run) { + if (task_is_runnable(current) && kvm_vcpu_wants_to_run(vcpu)) { WRITE_ONCE(vcpu->preempted, true); WRITE_ONCE(vcpu->ready, true); } -- 2.53.0 From: Joerg Roedel This will remove the need to update kvm_running_vcpu on plane switches. Signed-off-by: Joerg Roedel --- arch/arm64/kvm/arch_timer.c | 3 ++- arch/arm64/kvm/vgic/vgic-init.c | 3 ++- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 18 +++++++++++------- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index cbea4d9ee955..b2c4f422414e 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -204,7 +204,8 @@ static void soft_timer_cancel(struct hrtimer *hrt) static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; + struct kvm_vcpu_common *common = *(struct kvm_vcpu_common **)dev_id; + struct kvm_vcpu *vcpu = common->current_vcpu; struct arch_timer_context *ctx; struct timer_map map; diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 933983bb2005..a12b89b423d5 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -730,7 +730,8 @@ void kvm_vgic_cpu_down(void) static irqreturn_t vgic_maintenance_handler(int irq, void *data) { - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data; + struct kvm_vcpu_common *common = *(struct kvm_vcpu_common **)data; + struct kvm_vcpu *vcpu = common->current_vcpu; /* * We cannot rely on the vgic maintenance interrupt to be diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b334c15d834e..d54f299218a4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2462,7 +2462,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) } struct kvm_vcpu *kvm_get_running_vcpu(void); -struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); +struct kvm_vcpu_common * __percpu *kvm_get_running_vcpus(void); #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) struct kvm_kernel_irqfd; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2c16e124a507..9c07321e30f4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -113,7 +113,7 @@ LIST_HEAD(vm_list); static struct kmem_cache *kvm_vcpu_cache; static __read_mostly struct preempt_ops kvm_preempt_ops; -static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu); +static DEFINE_PER_CPU(struct kvm_vcpu_common *, kvm_running_vcpu); static struct dentry *kvm_debugfs_dir; @@ -165,7 +165,7 @@ void vcpu_load(struct kvm_vcpu *vcpu) { int cpu = get_cpu(); - __this_cpu_write(kvm_running_vcpu, vcpu); + __this_cpu_write(kvm_running_vcpu, vcpu->common); preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); @@ -3954,7 +3954,7 @@ void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait) * kick" check does not need atomic operations if kvm_vcpu_kick is used * within the vCPU thread itself. */ - if (vcpu == __this_cpu_read(kvm_running_vcpu)) { + if (vcpu == kvm_get_running_vcpu()) { if (vcpu->mode == IN_GUEST_MODE) WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE); goto out; @@ -6500,7 +6500,7 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) WRITE_ONCE(vcpu->preempted, false); WRITE_ONCE(vcpu->ready, false); - __this_cpu_write(kvm_running_vcpu, vcpu); + __this_cpu_write(kvm_running_vcpu, vcpu->common); kvm_arch_vcpu_load(vcpu, cpu); WRITE_ONCE(vcpu->scheduled_out, false); @@ -6532,12 +6532,16 @@ static void kvm_sched_out(struct preempt_notifier *pn, */ struct kvm_vcpu *kvm_get_running_vcpu(void) { - struct kvm_vcpu *vcpu; + struct kvm_vcpu_common *common; + struct kvm_vcpu *vcpu = NULL; preempt_disable(); - vcpu = __this_cpu_read(kvm_running_vcpu); + common = __this_cpu_read(kvm_running_vcpu); preempt_enable(); + if (common) + vcpu = common->current_vcpu; + return vcpu; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_running_vcpu); @@ -6545,7 +6549,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_running_vcpu); /** * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus. */ -struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) +struct kvm_vcpu_common * __percpu *kvm_get_running_vcpus(void) { return &kvm_running_vcpu; } -- 2.53.0 From: Joerg Roedel The scheduling state of the KVM VCPU is shared between all per-plane VCPU objects. Move it to struct kvm_vcpu_common. Signed-off-by: Joerg Roedel --- arch/x86/kvm/svm/svm.c | 2 +- include/linux/kvm_host.h | 24 ++++++++++---------- virt/kvm/kvm_main.c | 47 +++++++++++++++++++++------------------- 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1524c1bb4f37..f5cc30a6732f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -229,7 +229,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) * and only if the vCPU is actively running, e.g. to * avoid positives if userspace is stuffing state. */ - if (is_guest_mode(vcpu) && vcpu->wants_to_run) + if (is_guest_mode(vcpu) && vcpu->common->wants_to_run) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); svm_leave_nested(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d54f299218a4..a6aacd507c02 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -329,15 +329,21 @@ struct kvm_vcpu_common { /* Currently active VCPU */ struct kvm_vcpu *current_vcpu; + + /* Scheduling state */ +#ifdef CONFIG_PREEMPT_NOTIFIERS + struct preempt_notifier preempt_notifier; +#endif + bool wants_to_run; + bool preempted; + bool ready; + bool scheduled_out; }; struct kvm_vcpu { struct kvm *kvm; struct kvm_plane *plane; -#ifdef CONFIG_PREEMPT_NOTIFIERS - struct preempt_notifier preempt_notifier; -#endif int cpu; int vcpu_id; /* id given by userspace at creation */ int vcpu_idx; /* index into kvm->planes[]->vcpu_array */ @@ -392,10 +398,6 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif - bool wants_to_run; - bool preempted; - bool ready; - bool scheduled_out; struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; @@ -416,22 +418,22 @@ struct kvm_vcpu { static inline bool kvm_vcpu_wants_to_run(struct kvm_vcpu *vcpu) { - return vcpu->wants_to_run; + return vcpu->common->wants_to_run; } static inline bool kvm_vcpu_preempted(struct kvm_vcpu *vcpu) { - return READ_ONCE(vcpu->preempted); + return READ_ONCE(vcpu->common->preempted); } static inline bool kvm_vcpu_ready(struct kvm_vcpu *vcpu) { - return READ_ONCE(vcpu->ready); + return READ_ONCE(vcpu->common->ready); } static inline bool kvm_vcpu_scheduled_out(struct kvm_vcpu *vcpu) { - return vcpu->scheduled_out; + return vcpu->common->scheduled_out; } /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9c07321e30f4..a44f8dc8418a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -166,7 +166,7 @@ void vcpu_load(struct kvm_vcpu *vcpu) int cpu = get_cpu(); __this_cpu_write(kvm_running_vcpu, vcpu->common); - preempt_notifier_register(&vcpu->preempt_notifier); + preempt_notifier_register(&vcpu->common->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); } @@ -176,7 +176,7 @@ void vcpu_put(struct kvm_vcpu *vcpu) { preempt_disable(); kvm_arch_vcpu_put(vcpu); - preempt_notifier_unregister(&vcpu->preempt_notifier); + preempt_notifier_unregister(&vcpu->common->preempt_notifier); __this_cpu_write(kvm_running_vcpu, NULL); preempt_enable(); } @@ -468,6 +468,12 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned common->kvm = kvm; common->current_vcpu = vcpu; + + common->wants_to_run = false; + common->preempted = false; + common->ready = false; + preempt_notifier_init(&common->preempt_notifier, &kvm_preempt_ops); + vcpu->common = no_free_ptr(common); return 0; @@ -508,9 +514,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); - vcpu->preempted = false; - vcpu->ready = false; - preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); vcpu->last_used_slot = NULL; vcpu->plane_level = 0; @@ -3927,7 +3930,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_halt); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) { if (__kvm_vcpu_wake_up(vcpu)) { - WRITE_ONCE(vcpu->ready, true); + WRITE_ONCE(vcpu->common->ready, true); ++vcpu->stat.generic.halt_wakeup; return true; } @@ -4580,9 +4583,9 @@ static long kvm_vcpu_ioctl(struct file *filp, put_pid(oldpid); } - vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); + vcpu->common->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); r = kvm_arch_vcpu_ioctl_run(vcpu); - vcpu->wants_to_run = false; + vcpu->common->wants_to_run = false; /* * FIXME: Remove this hack once all KVM architectures @@ -6488,36 +6491,36 @@ static void kvm_init_debug(void) } static inline -struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) +struct kvm_vcpu_common *preempt_notifier_to_vcpu_common(struct preempt_notifier *pn) { - return container_of(pn, struct kvm_vcpu, preempt_notifier); + return container_of(pn, struct kvm_vcpu_common, preempt_notifier); } static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { - struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + struct kvm_vcpu_common *common = preempt_notifier_to_vcpu_common(pn); - WRITE_ONCE(vcpu->preempted, false); - WRITE_ONCE(vcpu->ready, false); + WRITE_ONCE(common->preempted, false); + WRITE_ONCE(common->ready, false); - __this_cpu_write(kvm_running_vcpu, vcpu->common); - kvm_arch_vcpu_load(vcpu, cpu); + __this_cpu_write(kvm_running_vcpu, common); + kvm_arch_vcpu_load(common->current_vcpu, cpu); - WRITE_ONCE(vcpu->scheduled_out, false); + WRITE_ONCE(common->scheduled_out, false); } static void kvm_sched_out(struct preempt_notifier *pn, struct task_struct *next) { - struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + struct kvm_vcpu_common *common = preempt_notifier_to_vcpu_common(pn); - WRITE_ONCE(vcpu->scheduled_out, true); + WRITE_ONCE(common->scheduled_out, true); - if (task_is_runnable(current) && kvm_vcpu_wants_to_run(vcpu)) { - WRITE_ONCE(vcpu->preempted, true); - WRITE_ONCE(vcpu->ready, true); + if (task_is_runnable(current) && common->wants_to_run) { + WRITE_ONCE(common->preempted, true); + WRITE_ONCE(common->ready, true); } - kvm_arch_vcpu_put(vcpu); + kvm_arch_vcpu_put(common->current_vcpu); __this_cpu_write(kvm_running_vcpu, NULL); } -- 2.53.0 From: Joerg Roedel Use accessors to manage the mutex so it is easier to move it to another struct. Signed-off-by: Joerg Roedel --- arch/arm64/kvm/arm.c | 4 ++-- arch/arm64/kvm/inject_fault.c | 4 ++-- arch/powerpc/kvm/book3s_xics.c | 4 ++-- arch/powerpc/kvm/book3s_xive.c | 4 ++-- arch/powerpc/kvm/book3s_xive_native.c | 4 ++-- arch/riscv/kvm/aia_device.c | 4 ++-- arch/s390/kvm/interrupt.c | 8 ++++---- arch/s390/kvm/kvm-s390.c | 8 ++++---- arch/s390/kvm/pv.c | 2 +- arch/x86/kvm/svm/sev.c | 2 +- arch/x86/kvm/vmx/nested.h | 4 ++-- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 15 +++++++++++++++ virt/kvm/kvm_main.c | 24 ++++++++++++------------ 14 files changed, 53 insertions(+), 38 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index de00088c9a80..295d7f19e4de 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -527,10 +527,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) #ifdef CONFIG_LOCKDEP /* Inform lockdep that the config_lock is acquired after vcpu->mutex */ - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); mutex_lock(&vcpu->kvm->arch.config_lock); mutex_unlock(&vcpu->kvm->arch.config_lock); - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); #endif /* Force users to call KVM_ARM_VCPU_INIT */ diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 89982bd3345f..000d94ed7948 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -248,7 +248,7 @@ static bool kvm_sea_target_is_el2(struct kvm_vcpu *vcpu) int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) { - lockdep_assert_held(&vcpu->mutex); + lockdep_assert_held(kvm_vcpu_mutex(vcpu)); if (is_nested_ctxt(vcpu) && kvm_sea_target_is_el2(vcpu)) return kvm_inject_nested_sea(vcpu, iabt, addr); @@ -367,7 +367,7 @@ static bool kvm_serror_undeliverable_at_el2(struct kvm_vcpu *vcpu) int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr) { - lockdep_assert_held(&vcpu->mutex); + lockdep_assert_held(kvm_vcpu_mutex(vcpu)); if (is_nested_ctxt(vcpu) && kvm_serror_target_is_el2(vcpu)) return kvm_inject_nested_serror(vcpu, esr); diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 74a44fa702b0..a9afd9df2690 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1361,9 +1361,9 @@ static void kvmppc_xics_release(struct kvm_device *dev) * have been cleared and the vcpu will not be going into the * XICS code anymore. */ - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); kvmppc_xics_free_icp(vcpu); - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } if (kvm) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1d67237783b7..e0c68e86f951 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2668,9 +2668,9 @@ static void kvmppc_xive_release(struct kvm_device *dev) * be executing the XIVE push or pull code or accessing * the XIVE MMIO regions. */ - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); kvmppc_xive_cleanup_vcpu(vcpu); - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } /* diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 728b5606dd14..40e93ac5fc2f 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1052,9 +1052,9 @@ static void kvmppc_xive_native_release(struct kvm_device *dev) * be executing the XIVE push or pull code or accessing * the XIVE MMIO regions. */ - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); kvmppc_xive_native_cleanup_vcpu(vcpu); - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } /* diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c index 3d1e81e2a36b..d98c8fddc89d 100644 --- a/arch/riscv/kvm/aia_device.c +++ b/arch/riscv/kvm/aia_device.c @@ -181,12 +181,12 @@ static int aia_imsic_addr(struct kvm *kvm, u64 *addr, return -EINVAL; } - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); if (write) vcpu_aia->imsic_addr = *addr; else *addr = vcpu_aia->imsic_addr; - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); return 0; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 3bcdbbbb6891..1d66ef9f7527 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3176,12 +3176,12 @@ void kvm_s390_gisa_enable(struct kvm *kvm) if (!gisa_desc) return; kvm_for_each_vcpu(i, vcpu, kvm) { - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); vcpu->arch.sie_block->gd = gisa_desc; vcpu->arch.sie_block->eca |= ECA_AIV; VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } } @@ -3212,10 +3212,10 @@ void kvm_s390_gisa_disable(struct kvm *kvm) if (!gi->origin) return; kvm_for_each_vcpu(i, vcpu, kvm) { - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); vcpu->arch.sie_block->eca &= ~ECA_AIV; vcpu->arch.sie_block->gd = 0U; - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); VCPU_EVENT(vcpu, 3, "AIV disabled for cpu %03u", vcpu->vcpu_id); } kvm_s390_gisa_destroy(kvm); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8401bcad1f37..e6fe83da172f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2360,13 +2360,13 @@ int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc) * We want to return the first failure rc and rrc, though. */ kvm_for_each_vcpu(i, vcpu, kvm) { - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) { *rc = _rc; *rrc = _rrc; ret = -EIO; } - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ if (use_gisa) @@ -2398,9 +2398,9 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) kvm_s390_gisa_disable(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); if (r) break; } diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 4b865e75351c..4661cbf28199 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected); bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) { - lockdep_assert_held(&vcpu->mutex); + lockdep_assert_held(kvm_vcpu_mutex(vcpu)); return !!kvm_s390_pv_cpu_get_handle(vcpu); } EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index d04f71836ef7..a23dcb081751 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -938,7 +938,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) u8 *d; int i; - lockdep_assert_held(&vcpu->mutex); + lockdep_assert_held(kvm_vcpu_mutex(vcpu)); if (vcpu->arch.guest_state_protected) return -EINVAL; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 213a448104af..7677dff127f1 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -57,7 +57,7 @@ bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { - lockdep_assert_once(lockdep_is_held(&vcpu->mutex) || + lockdep_assert_once(lockdep_is_held(kvm_vcpu_mutex(vcpu)) || !refcount_read(&vcpu->kvm->users_count)); return to_vmx(vcpu)->nested.cached_vmcs12; @@ -65,7 +65,7 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu) { - lockdep_assert_once(lockdep_is_held(&vcpu->mutex) || + lockdep_assert_once(lockdep_is_held(kvm_vcpu_mutex(vcpu)) || !refcount_read(&vcpu->kvm->users_count)); return to_vmx(vcpu)->nested.cached_shadow_vmcs12; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6355fe7f546f..2a87359cf42f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12941,7 +12941,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { - if (mutex_lock_killable(&vcpu->mutex)) + if (mutex_lock_killable(kvm_vcpu_mutex(vcpu))) return; vcpu_load(vcpu); kvm_synchronize_tsc(vcpu, NULL); @@ -12950,7 +12950,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) /* poll control enabled by default */ vcpu->arch.msr_kvm_poll_control = 1; - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a6aacd507c02..611bba515ac0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -999,6 +999,21 @@ static inline void kvm_vm_bugged(struct kvm *kvm) unlikely(__ret); \ }) +static inline void kvm_vcpu_lock(struct kvm_vcpu *vcpu) +{ + mutex_lock(&vcpu->mutex); +} + +static inline void kvm_vcpu_unlock(struct kvm_vcpu *vcpu) +{ + mutex_unlock(&vcpu->mutex); +} + +static inline struct mutex *kvm_vcpu_mutex(struct kvm_vcpu *vcpu) +{ + return &vcpu->mutex; +} + static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a44f8dc8418a..d6975a5c60b4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1499,7 +1499,7 @@ int kvm_trylock_all_vcpus(struct kvm *kvm) lockdep_assert_held(&kvm->lock); kvm_for_each_vcpu(i, vcpu, kvm) - if (!mutex_trylock_nest_lock(&vcpu->mutex, &kvm->lock)) + if (!mutex_trylock_nest_lock(kvm_vcpu_mutex(vcpu), &kvm->lock)) goto out_unlock; return 0; @@ -1507,7 +1507,7 @@ int kvm_trylock_all_vcpus(struct kvm *kvm) kvm_for_each_vcpu(j, vcpu, kvm) { if (i == j) break; - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } return -EINTR; } @@ -1522,7 +1522,7 @@ int kvm_lock_all_vcpus(struct kvm *kvm) lockdep_assert_held(&kvm->lock); kvm_for_each_vcpu(i, vcpu, kvm) { - r = mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock); + r = mutex_lock_killable_nest_lock(kvm_vcpu_mutex(vcpu), &kvm->lock); if (r) goto out_unlock; } @@ -1532,7 +1532,7 @@ int kvm_lock_all_vcpus(struct kvm *kvm) kvm_for_each_vcpu(j, vcpu, kvm) { if (i == j) break; - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } return r; } @@ -1546,7 +1546,7 @@ void kvm_unlock_all_vcpus(struct kvm *kvm) lockdep_assert_held(&kvm->lock); kvm_for_each_vcpu(i, vcpu, kvm) - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_unlock_all_vcpus); @@ -4353,14 +4353,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) * vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep * knows it's taken *inside* kvm->lock. */ - mutex_lock(&vcpu->mutex); + kvm_vcpu_lock(vcpu); kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); if (r < 0) goto kvm_put_xa_erase; kvm_vcpu_finish_common(vcpu); - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); mutex_unlock(&kvm->lock); kvm_arch_vcpu_postcreate(vcpu); @@ -4368,7 +4368,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) return r; kvm_put_xa_erase: - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); kvm_put_kvm_no_destroy(kvm); xa_erase(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx); unlock_vcpu_destroy: @@ -4509,10 +4509,10 @@ static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu) * complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU * is fully online). */ - if (mutex_lock_killable(&vcpu->mutex)) + if (mutex_lock_killable(kvm_vcpu_mutex(vcpu))) return -EINTR; - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); if (WARN_ON_ONCE(!kvm_get_vcpu(kvm, vcpu->vcpu_idx))) return -EIO; @@ -4552,7 +4552,7 @@ static long kvm_vcpu_ioctl(struct file *filp, if (r != -ENOIOCTLCMD) return r; - if (mutex_lock_killable(&vcpu->mutex)) + if (mutex_lock_killable(kvm_vcpu_mutex(vcpu))) return -EINTR; switch (ioctl) { case KVM_RUN: { @@ -4764,7 +4764,7 @@ static long kvm_vcpu_ioctl(struct file *filp, r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } out: - mutex_unlock(&vcpu->mutex); + kvm_vcpu_unlock(vcpu); kfree(fpu); kfree(kvm_sregs); return r; -- 2.53.0 From: Joerg Roedel Fields in struct kvm_vcpu which are protected by these locks is going to move to struct kvm_vcpu_common. So move the locks as well. Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 31 +++++++++++++++++-------------- virt/kvm/kvm_main.c | 3 ++- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 611bba515ac0..c8085c23e18e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -330,6 +330,13 @@ struct kvm_vcpu_common { /* Currently active VCPU */ struct kvm_vcpu *current_vcpu; + /* Locks */ + int ____srcu_idx; /* Don't use this directly. You've been warned. */ +#ifdef CONFIG_PROVE_RCU + int srcu_depth; +#endif + struct mutex mutex; + /* Scheduling state */ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; @@ -347,15 +354,11 @@ struct kvm_vcpu { int cpu; int vcpu_id; /* id given by userspace at creation */ int vcpu_idx; /* index into kvm->planes[]->vcpu_array */ - int ____srcu_idx; /* Don't use this directly. You've been warned. */ -#ifdef CONFIG_PROVE_RCU - int srcu_depth; -#endif + int mode; u64 requests; unsigned long guest_debug; - struct mutex mutex; struct kvm_run *run; #ifndef __KVM_HAVE_ARCH_WQP @@ -1001,35 +1004,35 @@ static inline void kvm_vm_bugged(struct kvm *kvm) static inline void kvm_vcpu_lock(struct kvm_vcpu *vcpu) { - mutex_lock(&vcpu->mutex); + mutex_lock(&vcpu->common->mutex); } static inline void kvm_vcpu_unlock(struct kvm_vcpu *vcpu) { - mutex_unlock(&vcpu->mutex); + mutex_unlock(&vcpu->common->mutex); } static inline struct mutex *kvm_vcpu_mutex(struct kvm_vcpu *vcpu) { - return &vcpu->mutex; + return &vcpu->common->mutex; } static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU - WARN_ONCE(vcpu->srcu_depth++, - "KVM: Illegal vCPU srcu_idx LOCK, depth=%d", vcpu->srcu_depth - 1); + WARN_ONCE(vcpu->common->srcu_depth++, + "KVM: Illegal vCPU srcu_idx LOCK, depth=%d", vcpu->common->srcu_depth - 1); #endif - vcpu->____srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + vcpu->common->____srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); } static inline void kvm_vcpu_srcu_read_unlock(struct kvm_vcpu *vcpu) { - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->____srcu_idx); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->common->____srcu_idx); #ifdef CONFIG_PROVE_RCU - WARN_ONCE(--vcpu->srcu_depth, - "KVM: Illegal vCPU srcu_idx UNLOCK, depth=%d", vcpu->srcu_depth); + WARN_ONCE(--vcpu->common->srcu_depth, + "KVM: Illegal vCPU srcu_idx UNLOCK, depth=%d", vcpu->common->srcu_depth); #endif } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d6975a5c60b4..9accca10c249 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -466,6 +466,8 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned common->vcpu_idx = atomic_read(&kvm->online_vcpus); + mutex_init(&common->mutex); + common->kvm = kvm; common->current_vcpu = vcpu; @@ -500,7 +502,6 @@ static void kvm_vcpu_finish_common(struct kvm_vcpu *vcpu) static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { - mutex_init(&vcpu->mutex); vcpu->cpu = -1; vcpu->kvm = kvm; vcpu->plane = kvm->planes[0]; -- 2.53.0 From: Joerg Roedel The rcuwait member is used to block and wake a VCPU thread. Since all plane VCPUs share a thread, there must only be a single wait object. Signed-off-by: Joerg Roedel --- arch/loongarch/kvm/timer.c | 2 +- arch/mips/kvm/mips.c | 4 ++-- include/linux/kvm_host.h | 9 +++++---- virt/kvm/kvm_main.c | 7 ++++--- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 8356fce0043f..9da10aa90558 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -31,7 +31,7 @@ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer) vcpu = container_of(timer, struct kvm_vcpu, arch.swtimer); kvm_queue_irq(vcpu, INT_TI); - rcuwait_wake_up(&vcpu->wait); + rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); return HRTIMER_NORESTART; } diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index f928ba105104..6469ec246dd6 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -265,7 +265,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) kvm_mips_callbacks->queue_timer_int(vcpu); vcpu->arch.wait = 0; - rcuwait_wake_up(&vcpu->wait); + rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); return kvm_mips_count_timeout(vcpu); } @@ -507,7 +507,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, dvcpu->arch.wait = 0; - rcuwait_wake_up(&dvcpu->wait); + rcuwait_wake_up(kvm_arch_vcpu_get_wait(dvcpu)); return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c8085c23e18e..c08ede1cefd2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -337,6 +337,10 @@ struct kvm_vcpu_common { #endif struct mutex mutex; +#ifndef __KVM_HAVE_ARCH_WQP + struct rcuwait wait; +#endif + /* Scheduling state */ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; @@ -361,9 +365,6 @@ struct kvm_vcpu { struct kvm_run *run; -#ifndef __KVM_HAVE_ARCH_WQP - struct rcuwait wait; -#endif struct pid *pid; rwlock_t pid_lock; int sigset_active; @@ -1806,7 +1807,7 @@ static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu) #ifdef __KVM_HAVE_ARCH_WQP return vcpu->arch.waitp; #else - return &vcpu->wait; + return &vcpu->common->wait; #endif } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9accca10c249..11e0d4af82df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -468,6 +468,10 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned mutex_init(&common->mutex); +#ifndef __KVM_HAVE_ARCH_WQP + rcuwait_init(&common->wait); +#endif + common->kvm = kvm; common->current_vcpu = vcpu; @@ -508,9 +512,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->vcpu_id = id; vcpu->pid = NULL; rwlock_init(&vcpu->pid_lock); -#ifndef __KVM_HAVE_ARCH_WQP - rcuwait_init(&vcpu->wait); -#endif kvm_async_pf_vcpu_init(vcpu); kvm_vcpu_set_in_spin_loop(vcpu, false); -- 2.53.0 From: Joerg Roedel Introduce accessors to make it easier to move this member of struct kvm_vcpu. Signed-off-by: Joerg Roedel --- arch/arm64/kvm/arm.c | 6 +++--- arch/loongarch/kvm/vcpu.c | 6 +++--- arch/mips/kvm/mips.c | 6 +++--- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/riscv/kvm/vcpu.c | 6 +++--- arch/x86/kvm/lapic.c | 3 ++- arch/x86/kvm/mmu/mmu.c | 4 ++-- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/common.h | 2 +- arch/x86/kvm/x86.c | 8 ++++---- include/linux/kvm_host.h | 25 +++++++++++++++++++++++++ virt/kvm/kvm_main.c | 4 ++-- 14 files changed, 52 insertions(+), 26 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 295d7f19e4de..001f83f737ea 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1298,10 +1298,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * See the comment in kvm_vcpu_exiting_guest_mode() and * Documentation/virt/kvm/vcpu-requests.rst */ - smp_store_mb(vcpu->mode, IN_GUEST_MODE); + kvm_vcpu_set_mode_mb(vcpu, IN_GUEST_MODE); if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) { - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); isb(); /* Ensure work in x_flush_hwstate is committed */ if (kvm_vcpu_has_pmu(vcpu)) kvm_pmu_sync_hwstate(vcpu); @@ -1323,7 +1323,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = kvm_arm_vcpu_enter_exit(vcpu); - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); vcpu->stat.exits++; /* * Back from guest diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index bde8b68b8273..bab3c66ae58d 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -311,7 +311,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) kvm_deliver_intr(vcpu); kvm_deliver_exception(vcpu); /* Make sure the vcpu mode has been written */ - smp_store_mb(vcpu->mode, IN_GUEST_MODE); + kvm_vcpu_set_mode_mb(vcpu, IN_GUEST_MODE); kvm_check_vpid(vcpu); /* @@ -329,7 +329,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_PMU, vcpu); } /* make sure the vcpu mode has been written */ - smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); + kvm_vcpu_set_mode_mb(vcpu, OUTSIDE_GUEST_MODE); local_irq_enable(); ret = -EAGAIN; } @@ -348,7 +348,7 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) u32 intr = estat & CSR_ESTAT_IS; u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); /* Set a default exit reason */ run->exit_reason = KVM_EXIT_UNKNOWN; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 6469ec246dd6..776aba0af096 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -448,7 +448,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * flush request while the requester sees the VCPU as outside of guest * mode and not needing an IPI. */ - smp_store_mb(vcpu->mode, IN_GUEST_MODE); + kvm_vcpu_set_mode_mb(vcpu, IN_GUEST_MODE); r = kvm_mips_vcpu_enter_exit(vcpu); @@ -1175,7 +1175,7 @@ static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu) u32 inst; int ret = RESUME_GUEST; - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); /* Set a default exit reason */ run->exit_reason = KVM_EXIT_UNKNOWN; @@ -1329,7 +1329,7 @@ static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu) * or we could miss a TLB flush request while the requester sees * the VCPU as outside of guest mode and not needing an IPI. */ - smp_store_mb(vcpu->mode, IN_GUEST_MODE); + kvm_vcpu_set_mode_mb(vcpu, IN_GUEST_MODE); kvm_mips_callbacks->vcpu_reenter(vcpu); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 2ba2dd26a7ea..0a14870f1d33 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1852,7 +1852,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) srr_regs_clobbered(); out: - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); return ret; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f3ddb24ece74..08b3180adc83 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -823,7 +823,7 @@ int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) #endif out: - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); return ret; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 800867c164c6..5d94e0f676ec 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -98,7 +98,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } - vcpu->mode = IN_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, IN_GUEST_MODE); /* * Reading vcpu->requests must happen after setting vcpu->mode, diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 8519a5bfbdc4..66cde226eb87 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -903,7 +903,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * See the comment in kvm_vcpu_exiting_guest_mode() and * Documentation/virt/kvm/vcpu-requests.rst */ - vcpu->mode = IN_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, IN_GUEST_MODE); kvm_vcpu_srcu_read_unlock(vcpu); smp_mb__after_srcu_read_unlock(); @@ -920,7 +920,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); local_irq_enable(); preempt_enable(); kvm_vcpu_srcu_read_lock(vcpu); @@ -941,7 +941,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_enter_exit(vcpu, &trap); - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); vcpu->stat.exits++; /* Syncup interrupts state with HW */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ab40a2e4ab9d..1b763f164951 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -200,7 +200,8 @@ static bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) { - return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE; + return kvm_can_post_timer_interrupt(vcpu) && + kvm_vcpu_mode(vcpu) == IN_GUEST_MODE; } static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index f0144ae8d891..0cec559f59b1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -574,7 +574,7 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) * Make sure a following spte read is not reordered ahead of the write * to vcpu->mode. */ - smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); + kvm_vcpu_set_mode_mb(vcpu, READING_SHADOW_PAGE_TABLES); } } @@ -588,7 +588,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. */ - smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); + kvm_vcpu_set_mode_release(vcpu, OUTSIDE_GUEST_MODE); local_irq_enable(); } } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f5cc30a6732f..e8ad880a4266 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3870,7 +3870,7 @@ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, * apic->apicv_active must be read after vcpu->mode. * Pairs with smp_store_release in vcpu_enter_guest. */ - bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE); + bool in_guest_mode = (kvm_vcpu_mode_acquire(vcpu) == IN_GUEST_MODE); /* Note, this is called iff the local APIC is in-kernel. */ if (!READ_ONCE(vcpu->arch.apic->apicv_active)) { diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h index 412d0829d7a2..fe480f7cf55e 100644 --- a/arch/x86/kvm/vmx/common.h +++ b/arch/x86/kvm/vmx/common.h @@ -112,7 +112,7 @@ static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, int pi_vec) { #ifdef CONFIG_SMP - if (vcpu->mode == IN_GUEST_MODE) { + if (kvm_vcpu_mode(vcpu) == IN_GUEST_MODE) { /* * The vector of the virtual has already been set in the PIR. * Send a notification event to deliver the virtual interrupt diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2a87359cf42f..50601ac2828f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2272,7 +2272,7 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { xfer_to_guest_mode_prepare(); - return READ_ONCE(vcpu->mode) == EXITING_GUEST_MODE || + return kvm_vcpu_mode(vcpu) == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } @@ -11391,7 +11391,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) local_irq_disable(); /* Store vcpu->apicv_active before vcpu->mode. */ - smp_store_release(&vcpu->mode, IN_GUEST_MODE); + kvm_vcpu_set_mode_release(vcpu, IN_GUEST_MODE); kvm_vcpu_srcu_read_unlock(vcpu); @@ -11420,7 +11420,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_call(sync_pir_to_irr)(vcpu); if (kvm_vcpu_exit_request(vcpu)) { - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); smp_wmb(); local_irq_enable(); preempt_enable(); @@ -11539,7 +11539,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.last_vmentry_cpu = vcpu->cpu; vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE); smp_wmb(); kvm_load_xfeatures(vcpu, false); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c08ede1cefd2..45286b3b35c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -440,6 +440,31 @@ static inline bool kvm_vcpu_scheduled_out(struct kvm_vcpu *vcpu) return vcpu->common->scheduled_out; } +static inline int kvm_vcpu_mode(struct kvm_vcpu *vcpu) +{ + return vcpu->mode; +} + +static inline int kvm_vcpu_mode_acquire(struct kvm_vcpu *vcpu) +{ + return smp_load_acquire(&vcpu->mode); +} + +static inline void kvm_vcpu_set_mode(struct kvm_vcpu *vcpu, int mode) +{ + vcpu->mode = mode; +} + +static inline void kvm_vcpu_set_mode_mb(struct kvm_vcpu *vcpu, int mode) +{ + smp_store_mb(vcpu->mode, mode); +} + +static inline void kvm_vcpu_set_mode_release(struct kvm_vcpu *vcpu, int mode) +{ + smp_store_release(&vcpu->mode, mode); +} + /* * Start accounting time towards a guest. * Must be called before entering guest context. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 11e0d4af82df..7ea20d96bc89 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3960,8 +3960,8 @@ void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait) * within the vCPU thread itself. */ if (vcpu == kvm_get_running_vcpu()) { - if (vcpu->mode == IN_GUEST_MODE) - WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE); + if (kvm_vcpu_mode(vcpu) == IN_GUEST_MODE) + kvm_vcpu_set_mode(vcpu, EXITING_GUEST_MODE); goto out; } -- 2.53.0 From: Joerg Roedel These fields must be shared across all planes of a given VCPU. Signed-off-by: Joerg Roedel --- arch/powerpc/kvm/trace.h | 2 +- arch/x86/kvm/trace.h | 2 +- include/linux/kvm_host.h | 27 ++++++++++++++------------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index ea1d7c808319..35c000d918bb 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -108,7 +108,7 @@ TRACE_EVENT(kvm_check_requests, TP_fast_assign( __entry->cpu_nr = vcpu->vcpu_id; - __entry->requests = vcpu->requests; + __entry->requests = vcpu->common->requests; ), TP_printk("vcpu=%x requests=%x", diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0db25bba17f6..0d2dd25bed12 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -409,7 +409,7 @@ TRACE_EVENT(name, \ __entry->guest_rip = tracing_kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ - __entry->requests = READ_ONCE(vcpu->requests); \ + __entry->requests = READ_ONCE(vcpu->common->requests); \ kvm_x86_call(get_exit_info)(vcpu, \ &__entry->exit_reason, \ &__entry->info1, \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 45286b3b35c9..7704820986da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -180,7 +180,7 @@ static inline bool kvm_is_error_gpa(gpa_t gpa) #define KVM_REQ_OUTSIDE_GUEST_MODE (KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ - BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ + BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu_common, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ }) #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) @@ -341,6 +341,9 @@ struct kvm_vcpu_common { struct rcuwait wait; #endif + int mode; + u64 requests; + /* Scheduling state */ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; @@ -359,8 +362,6 @@ struct kvm_vcpu { int vcpu_id; /* id given by userspace at creation */ int vcpu_idx; /* index into kvm->planes[]->vcpu_array */ - int mode; - u64 requests; unsigned long guest_debug; struct kvm_run *run; @@ -442,27 +443,27 @@ static inline bool kvm_vcpu_scheduled_out(struct kvm_vcpu *vcpu) static inline int kvm_vcpu_mode(struct kvm_vcpu *vcpu) { - return vcpu->mode; + return vcpu->common->mode; } static inline int kvm_vcpu_mode_acquire(struct kvm_vcpu *vcpu) { - return smp_load_acquire(&vcpu->mode); + return smp_load_acquire(&vcpu->common->mode); } static inline void kvm_vcpu_set_mode(struct kvm_vcpu *vcpu, int mode) { - vcpu->mode = mode; + vcpu->common->mode = mode; } static inline void kvm_vcpu_set_mode_mb(struct kvm_vcpu *vcpu, int mode) { - smp_store_mb(vcpu->mode, mode); + smp_store_mb(vcpu->common->mode, mode); } static inline void kvm_vcpu_set_mode_release(struct kvm_vcpu *vcpu, int mode) { - smp_store_release(&vcpu->mode, mode); + smp_store_release(&vcpu->common->mode, mode); } /* @@ -630,7 +631,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) * memory barrier following the write of vcpu->mode in VCPU RUN. */ smp_mb__before_atomic(); - return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE); + return cmpxchg(&vcpu->common->mode, IN_GUEST_MODE, EXITING_GUEST_MODE); } /* @@ -2355,7 +2356,7 @@ static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu) * caller. Paired with the smp_mb__after_atomic in kvm_check_request. */ smp_wmb(); - set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); + set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); } static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) @@ -2381,17 +2382,17 @@ static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu) static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { - return READ_ONCE(vcpu->requests); + return READ_ONCE(vcpu->common->requests); } static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) { - return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); + return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); } static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu) { - clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); + clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); } static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) -- 2.53.0 From: Joerg Roedel The bitfield layout is shared with the global vcpu->common->requests field. A new flag will indicate in which bitmap the request will be set. Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7704820986da..3c72a462ccfa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -159,6 +159,7 @@ static inline bool kvm_is_error_gpa(gpa_t gpa) #define KVM_REQUEST_NO_WAKEUP BIT(8) #define KVM_REQUEST_WAIT BIT(9) #define KVM_REQUEST_NO_ACTION BIT(10) +#define KVM_REQUEST_PER_PLANE BIT(11) /* * Architecture-independent vcpu->requests bit members * Bits 3-7 are reserved for more arch-independent bits. @@ -184,6 +185,7 @@ static inline bool kvm_is_error_gpa(gpa_t gpa) (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ }) #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) +#define KVM_ARCH_PLANE_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, KVM_REQUEST_PER_PLANE) bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap); @@ -371,6 +373,10 @@ struct kvm_vcpu { int sigset_active; sigset_t sigset; unsigned int halt_poll_ns; + + u64 plane_requests; + + /* S390 only */ bool valid_wakeup; #ifdef CONFIG_HAS_IOMEM @@ -2356,7 +2362,10 @@ static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu) * caller. Paired with the smp_mb__after_atomic in kvm_check_request. */ smp_wmb(); - set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); + if (req & KVM_REQUEST_PER_PLANE) + set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->plane_requests); + else + set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); } static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) @@ -2382,17 +2391,23 @@ static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu) static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { - return READ_ONCE(vcpu->common->requests); + return READ_ONCE(vcpu->common->requests) || READ_ONCE(vcpu->plane_requests); } static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) { - return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); + if (req & KVM_REQUEST_PER_PLANE) + return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->plane_requests); + else + return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); } static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu) { - clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); + if (req & KVM_REQUEST_PER_PLANE) + clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->plane_requests); + else + clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests); } static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) -- 2.53.0 From: Joerg Roedel These are the same across all planes for one VCPU, so make then shared. Signed-off-by: Joerg Roedel --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/arm.c | 2 +- include/linux/kvm_host.h | 5 ++-- virt/kvm/kvm_main.c | 44 ++++++++++++++++--------------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a49042bfa801..32dc484781f0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1270,7 +1270,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->pid)) +#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->common->pid)) #ifndef __KVM_NVHE_HYPERVISOR__ #define kvm_call_hyp_nvhe(f, ...) \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 001f83f737ea..1e2f42134b74 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -726,7 +726,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) vcpu_set_on_unsupported_cpu(vcpu); - vcpu->arch.pid = pid_nr(vcpu->pid); + vcpu->arch.pid = pid_nr(vcpu->common->pid); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3c72a462ccfa..73786712495d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -346,6 +346,9 @@ struct kvm_vcpu_common { int mode; u64 requests; + struct pid *pid; + rwlock_t pid_lock; + /* Scheduling state */ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; @@ -368,8 +371,6 @@ struct kvm_vcpu { struct kvm_run *run; - struct pid *pid; - rwlock_t pid_lock; int sigset_active; sigset_t sigset; unsigned int halt_poll_ns; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7ea20d96bc89..af3c4e0081b8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -475,6 +475,9 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned common->kvm = kvm; common->current_vcpu = vcpu; + common->pid = NULL; + rwlock_init(&common->pid_lock); + common->wants_to_run = false; common->preempted = false; common->ready = false; @@ -510,8 +513,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->kvm = kvm; vcpu->plane = kvm->planes[0]; vcpu->vcpu_id = id; - vcpu->pid = NULL; - rwlock_init(&vcpu->pid_lock); kvm_async_pf_vcpu_init(vcpu); kvm_vcpu_set_in_spin_loop(vcpu, false); @@ -539,6 +540,12 @@ static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu) kvm->created_vcpus--; mutex_unlock(&common->kvm->lock); + /* + * No need for rcu_read_lock as VCPU_RUN is the only place that changes + * the common->pid pointer, and at destruction time all file descriptors + * are already gone. + */ + put_pid(common->pid); kfree(common); } @@ -549,13 +556,6 @@ static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_vcpu_common_destroy(vcpu); kvm_dirty_ring_free(&vcpu->dirty_ring); - /* - * No need for rcu_read_lock as VCPU_RUN is the only place that changes - * the vcpu->pid pointer, and at destruction time all file descriptors - * are already gone. - */ - put_pid(vcpu->pid); - free_page((unsigned long)vcpu->run); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -3996,16 +3996,17 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_vcpu_kick); int kvm_vcpu_yield_to(struct kvm_vcpu *target) { + struct kvm_vcpu_common *common = target->common; struct task_struct *task = NULL; int ret; - if (!read_trylock(&target->pid_lock)) + if (!read_trylock(&common->pid_lock)) return 0; - if (target->pid) - task = get_pid_task(target->pid, PIDTYPE_PID); + if (common->pid) + task = get_pid_task(common->pid, PIDTYPE_PID); - read_unlock(&target->pid_lock); + read_unlock(&common->pid_lock); if (!task) return 0; @@ -4258,9 +4259,9 @@ static int vcpu_get_pid(void *data, u64 *val) { struct kvm_vcpu *vcpu = data; - read_lock(&vcpu->pid_lock); - *val = pid_nr(vcpu->pid); - read_unlock(&vcpu->pid_lock); + read_lock(&vcpu->common->pid_lock); + *val = pid_nr(vcpu->common->pid); + read_unlock(&vcpu->common->pid_lock); return 0; } @@ -4558,6 +4559,7 @@ static long kvm_vcpu_ioctl(struct file *filp, return -EINTR; switch (ioctl) { case KVM_RUN: { + struct kvm_vcpu_common *common = vcpu->common; struct pid *oldpid; r = -EINVAL; if (arg) @@ -4569,7 +4571,7 @@ static long kvm_vcpu_ioctl(struct file *filp, * read vcpu->pid while this vCPU is in KVM_RUN, e.g. to yield * directly to this vCPU */ - oldpid = vcpu->pid; + oldpid = common->pid; if (unlikely(oldpid != task_pid(current))) { /* The thread running this VCPU changed. */ struct pid *newpid; @@ -4579,15 +4581,15 @@ static long kvm_vcpu_ioctl(struct file *filp, break; newpid = get_task_pid(current, PIDTYPE_PID); - write_lock(&vcpu->pid_lock); - vcpu->pid = newpid; - write_unlock(&vcpu->pid_lock); + write_lock(&common->pid_lock); + common->pid = newpid; + write_unlock(&common->pid_lock); put_pid(oldpid); } vcpu->common->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); r = kvm_arch_vcpu_ioctl_run(vcpu); - vcpu->common->wants_to_run = false; + common->wants_to_run = false; /* * FIXME: Remove this hack once all KVM architectures -- 2.53.0 From: Joerg Roedel These are the same across all planes for one VCPU, so make then shared. Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 73786712495d..9220c452aa3a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -348,6 +348,8 @@ struct kvm_vcpu_common { struct pid *pid; rwlock_t pid_lock; + int sigset_active; + sigset_t sigset; /* Scheduling state */ #ifdef CONFIG_PREEMPT_NOTIFIERS @@ -371,8 +373,6 @@ struct kvm_vcpu { struct kvm_run *run; - int sigset_active; - sigset_t sigset; unsigned int halt_poll_ns; u64 plane_requests; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index af3c4e0081b8..1858880ee3d3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3694,7 +3694,9 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_mark_page_dirty); void kvm_sigset_activate(struct kvm_vcpu *vcpu) { - if (!vcpu->sigset_active) + struct kvm_vcpu_common *common = vcpu->common; + + if (!common->sigset_active) return; /* @@ -3703,12 +3705,14 @@ void kvm_sigset_activate(struct kvm_vcpu *vcpu) * ->real_blocked don't care as long ->real_blocked is always a subset * of ->blocked. */ - sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); + sigprocmask(SIG_SETMASK, &common->sigset, ¤t->real_blocked); } void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) { - if (!vcpu->sigset_active) + struct kvm_vcpu_common *common = vcpu->common; + + if (!common->sigset_active) return; sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); @@ -4391,12 +4395,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) { + struct kvm_vcpu_common *common = vcpu->common; + if (sigset) { sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - vcpu->sigset_active = 1; - vcpu->sigset = *sigset; + common->sigset_active = 1; + common->sigset = *sigset; } else - vcpu->sigset_active = 0; + common->sigset_active = 0; return 0; } -- 2.53.0 From: Joerg Roedel Onlyh one struct kvm_vcpu across all planes can be in a spin-loop. Move the state to struct kvm_vcpu_common to make detection independent of the active struct kvm_vcpu. Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 32 +++++++++++++++---------------- virt/kvm/kvm_main.c | 41 ++++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9220c452aa3a..f6e8a0b653b3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -350,6 +350,20 @@ struct kvm_vcpu_common { rwlock_t pid_lock; int sigset_active; sigset_t sigset; + unsigned int halt_poll_ns; + +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT + /* + * Cpu relax intercept or pause loop exit optimization + * in_spin_loop: set when a vcpu does a pause loop exit + * or cpu relax intercepted. + * dy_eligible: indicates whether vcpu is eligible for directed yield. + */ + struct { + bool in_spin_loop; + bool dy_eligible; + } spin_loop; +#endif /* Scheduling state */ #ifdef CONFIG_PREEMPT_NOTIFIERS @@ -373,8 +387,6 @@ struct kvm_vcpu { struct kvm_run *run; - unsigned int halt_poll_ns; - u64 plane_requests; /* S390 only */ @@ -398,18 +410,6 @@ struct kvm_vcpu { } async_pf; #endif -#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT - /* - * Cpu relax intercept or pause loop exit optimization - * in_spin_loop: set when a vcpu does a pause loop exit - * or cpu relax intercepted. - * dy_eligible: indicates whether vcpu is eligible for directed yield. - */ - struct { - bool in_spin_loop; - bool dy_eligible; - } spin_loop; -#endif struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; @@ -2500,11 +2500,11 @@ extern struct kvm_device_ops kvm_arm_vgic_v5_ops; static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) { - vcpu->spin_loop.in_spin_loop = val; + vcpu->common->spin_loop.in_spin_loop = val; } static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { - vcpu->spin_loop.dy_eligible = val; + vcpu->common->spin_loop.dy_eligible = val; } #else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1858880ee3d3..24ff8748a317 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -485,6 +485,9 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned vcpu->common = no_free_ptr(common); + kvm_vcpu_set_in_spin_loop(vcpu, false); + kvm_vcpu_set_dy_eligible(vcpu, false); + return 0; out_drop_counter: @@ -515,8 +518,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->vcpu_id = id; kvm_async_pf_vcpu_init(vcpu); - kvm_vcpu_set_in_spin_loop(vcpu, false); - kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->last_used_slot = NULL; vcpu->plane_level = 0; @@ -3721,9 +3722,10 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_common *common = vcpu->common; unsigned int old, val, grow, grow_start; - old = val = vcpu->halt_poll_ns; + old = val = common->halt_poll_ns; grow_start = READ_ONCE(halt_poll_ns_grow_start); grow = READ_ONCE(halt_poll_ns_grow); if (!grow) @@ -3733,16 +3735,17 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) if (val < grow_start) val = grow_start; - vcpu->halt_poll_ns = val; + common->halt_poll_ns = val; out: trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); } static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_common *common = vcpu->common; unsigned int old, val, shrink, grow_start; - old = val = vcpu->halt_poll_ns; + old = val = common->halt_poll_ns; shrink = READ_ONCE(halt_poll_ns_shrink); grow_start = READ_ONCE(halt_poll_ns_grow_start); if (shrink == 0) @@ -3753,7 +3756,7 @@ static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) if (val < grow_start) val = 0; - vcpu->halt_poll_ns = val; + common->halt_poll_ns = val; trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); } @@ -3864,19 +3867,20 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu) { unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); bool halt_poll_allowed = !kvm_arch_no_poll(vcpu); + struct kvm_vcpu_common *common = vcpu->common; ktime_t start, cur, poll_end; bool waited = false; bool do_halt_poll; u64 halt_ns; - if (vcpu->halt_poll_ns > max_halt_poll_ns) - vcpu->halt_poll_ns = max_halt_poll_ns; + if (common->halt_poll_ns > max_halt_poll_ns) + common->halt_poll_ns = max_halt_poll_ns; - do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; + do_halt_poll = halt_poll_allowed && common->halt_poll_ns; start = cur = poll_end = ktime_get(); if (do_halt_poll) { - ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); + ktime_t stop = ktime_add_ns(start, common->halt_poll_ns); do { if (kvm_vcpu_check_block(vcpu) < 0) @@ -3914,18 +3918,18 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu) if (!vcpu_valid_wakeup(vcpu)) { shrink_halt_poll_ns(vcpu); } else if (max_halt_poll_ns) { - if (halt_ns <= vcpu->halt_poll_ns) + if (halt_ns <= common->halt_poll_ns) ; /* we had a long block, shrink polling */ - else if (vcpu->halt_poll_ns && + else if (common->halt_poll_ns && halt_ns > max_halt_poll_ns) shrink_halt_poll_ns(vcpu); /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < max_halt_poll_ns && + else if (common->halt_poll_ns < max_halt_poll_ns && halt_ns < max_halt_poll_ns) grow_halt_poll_ns(vcpu); } else { - vcpu->halt_poll_ns = 0; + common->halt_poll_ns = 0; } } @@ -4046,13 +4050,14 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_yield_to); static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) { #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT + struct kvm_vcpu_common *common = vcpu->common; bool eligible; - eligible = !vcpu->spin_loop.in_spin_loop || - vcpu->spin_loop.dy_eligible; + eligible = !common->spin_loop.in_spin_loop || + common->spin_loop.dy_eligible; - if (vcpu->spin_loop.in_spin_loop) - kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); + if (common->spin_loop.in_spin_loop) + kvm_vcpu_set_dy_eligible(vcpu, !common->spin_loop.dy_eligible); return eligible; #else -- 2.53.0 From: Joerg Roedel The dirty tracking should happen across all planes of a given VCPU, so move the dirty_ring to struct kvm_vcpu_common. Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 3 ++- virt/kvm/dirty_ring.c | 4 ++-- virt/kvm/kvm_main.c | 22 ++++++++++------------ 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f6e8a0b653b3..7d06459a06f3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -373,6 +373,8 @@ struct kvm_vcpu_common { bool preempted; bool ready; bool scheduled_out; + + struct kvm_dirty_ring dirty_ring; }; struct kvm_vcpu { @@ -413,7 +415,6 @@ struct kvm_vcpu { struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; - struct kvm_dirty_ring dirty_ring; /* * The most recently used memslot by this vCPU and the slots generation diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index 572b854edf74..c6f46b93bddb 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -218,7 +218,7 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring, void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset) { - struct kvm_dirty_ring *ring = &vcpu->dirty_ring; + struct kvm_dirty_ring *ring = &vcpu->common->dirty_ring; struct kvm_dirty_gfn *entry; /* It should never get full */ @@ -250,7 +250,7 @@ bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu) * the dirty ring is reset by userspace. */ if (kvm_check_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu) && - kvm_dirty_ring_soft_full(&vcpu->dirty_ring)) { + kvm_dirty_ring_soft_full(&vcpu->common->dirty_ring)) { kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu); vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL; trace_kvm_dirty_ring_exit(vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 24ff8748a317..f85ddb0fc781 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -483,6 +483,13 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned common->ready = false; preempt_notifier_init(&common->preempt_notifier, &kvm_preempt_ops); + if (kvm->dirty_ring_size) { + r = kvm_dirty_ring_alloc(kvm, &common->dirty_ring, + id, kvm->dirty_ring_size); + if (r) + goto out_drop_counter; + } + vcpu->common = no_free_ptr(common); kvm_vcpu_set_in_spin_loop(vcpu, false); @@ -547,6 +554,7 @@ static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu) * are already gone. */ put_pid(common->pid); + kvm_dirty_ring_free(&common->dirty_ring); kfree(common); } @@ -555,7 +563,6 @@ static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_arch_vcpu_destroy(vcpu); kvm_vcpu_common_destroy(vcpu); - kvm_dirty_ring_free(&vcpu->dirty_ring); free_page((unsigned long)vcpu->run); kmem_cache_free(kvm_vcpu_cache, vcpu); @@ -4209,7 +4216,7 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) #endif else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) page = kvm_dirty_ring_get_page( - &vcpu->dirty_ring, + &vcpu->common->dirty_ring, vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); else return kvm_arch_vcpu_fault(vcpu, vmf); @@ -4338,13 +4345,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) if (r) goto vcpu_free_run_page; - if (kvm->dirty_ring_size) { - r = kvm_dirty_ring_alloc(kvm, &vcpu->dirty_ring, - id, kvm->dirty_ring_size); - if (r) - goto arch_vcpu_destroy; - } - mutex_lock(&kvm->lock); if (kvm_get_vcpu_by_id(kvm, id)) { @@ -4385,8 +4385,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) xa_erase(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx); unlock_vcpu_destroy: mutex_unlock(&kvm->lock); - kvm_dirty_ring_free(&vcpu->dirty_ring); -arch_vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); vcpu_free_run_page: free_page((unsigned long)vcpu->run); @@ -5120,7 +5118,7 @@ static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) mutex_lock(&kvm->slots_lock); kvm_for_each_vcpu(i, vcpu, kvm) { - r = kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring, &cleared); + r = kvm_dirty_ring_reset(vcpu->kvm, &vcpu->common->dirty_ring, &cleared); if (r) break; } -- 2.53.0 From: Joerg Roedel Introduce struct kvm_arch_plane which is per architecture and will be used to store architecture-specific per-plane state. Signed-off-by: Joerg Roedel --- arch/arm64/include/asm/kvm_host.h | 12 ++++++++++++ arch/loongarch/include/asm/kvm_host.h | 12 ++++++++++++ arch/mips/include/asm/kvm_host.h | 12 ++++++++++++ arch/powerpc/include/asm/kvm_host.h | 12 ++++++++++++ arch/riscv/include/asm/kvm_host.h | 12 ++++++++++++ arch/s390/include/asm/kvm_host.h | 12 ++++++++++++ arch/x86/include/asm/kvm_host.h | 12 ++++++++++++ include/linux/kvm_host.h | 2 ++ include/linux/kvm_types.h | 1 + virt/kvm/kvm_main.c | 9 +++++++++ 10 files changed, 96 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 32dc484781f0..e9cca2adb371 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -306,6 +306,18 @@ enum fgt_group_id { __NR_FGT_GROUP_IDS__ }; +/* Per-plane state of VM */ +struct kvm_arch_plane {}; + +static inline int kvm_arch_plane_init(struct kvm *kvm, + struct kvm_plane *plane, + unsigned plane_level) +{ + return 0; +} + +static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {} + struct kvm_arch { struct kvm_s2_mmu mmu; diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 776bc487a705..225aa87ebbdd 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -116,6 +116,18 @@ struct kvm_phyid_map { struct kvm_phyid_info phys_map[KVM_MAX_PHYID]; }; +/* Per-plane state of VM */ +struct kvm_arch_plane {}; + +static inline int kvm_arch_plane_init(struct kvm *kvm, + struct kvm_plane *plane, + unsigned plane_level) +{ + return 0; +} + +static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {} + struct kvm_arch { /* Guest physical mm */ kvm_pte_t *pgd; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index c14b10821817..b01911eb9064 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -172,6 +172,18 @@ struct loongson_kvm_ipi { }; #endif +/* Per-plane state of VM */ +struct kvm_arch_plane {}; + +static inline int kvm_arch_plane_init(struct kvm *kvm, + struct kvm_plane *plane, + unsigned plane_level) +{ + return 0; +} + +static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {} + struct kvm_arch { /* Guest physical mm */ struct mm_struct gpa_mm; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 2d139c807577..c5b9fbaf34f3 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -269,6 +269,18 @@ struct kvm_hpt_info { struct kvm_resize_hpt; +/* Per-plane state of VM */ +struct kvm_arch_plane {}; + +static inline int kvm_arch_plane_init(struct kvm *kvm, + struct kvm_plane *plane, + unsigned plane_level) +{ + return 0; +} + +static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {} + /* Flag values for kvm_arch.secure_guest */ #define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */ #define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */ diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 75b0a951c1bc..bcbf487d4cb7 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -76,6 +76,18 @@ struct kvm_vcpu_stat { struct kvm_arch_memory_slot { }; +/* Per-plane state of VM */ +struct kvm_arch_plane {}; + +static inline int kvm_arch_plane_init(struct kvm *kvm, + struct kvm_plane *plane, + unsigned plane_level) +{ + return 0; +} + +static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {} + struct kvm_arch { /* G-stage vmid */ struct kvm_vmid vmid; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8a4f4a39f7a2..bb3bfbfd35d8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -638,6 +638,18 @@ struct kvm_s390_pv { struct kvm_s390_mmu_cache; +/* Per-plane state of VM */ +struct kvm_arch_plane {}; + +static inline int kvm_arch_plane_init(struct kvm *kvm, + struct kvm_plane *plane, + unsigned plane_level) +{ + return 0; +} + +static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {} + struct kvm_arch { struct esca_block *sca; debug_info_t *dbf; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 43c92f0ada1e..dd95c70bfdba 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1412,6 +1412,18 @@ enum kvm_mmu_type { KVM_NR_MMU_TYPES, }; +/* Per-plane state of VM */ +struct kvm_arch_plane {}; + +static inline int kvm_arch_plane_init(struct kvm *kvm, + struct kvm_plane *plane, + unsigned plane_level) +{ + return 0; +} + +static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {} + struct kvm_arch { unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7d06459a06f3..4a0eaa1de479 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -847,6 +847,8 @@ struct kvm_plane { /* Per-Plane VCPU array */ struct xarray vcpu_array; + + struct kvm_arch_plane arch; }; struct kvm { diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index a568d8e6f4e8..07e82928c948 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -42,6 +42,7 @@ struct kvm_interrupt; struct kvm_irq_routing_table; struct kvm_memory_slot; struct kvm_one_reg; +struct kvm_plane; struct kvm_run; struct kvm_userspace_memory_region; struct kvm_vcpu; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f85ddb0fc781..91fb9abf9b31 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1211,13 +1211,22 @@ static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level) xa_init(&plane->vcpu_array); + if (kvm_arch_plane_init(kvm, plane, plane_level)) + goto out_free_plane; + kvm->planes[plane_level] = plane; return plane; + +out_free_plane: + kfree(plane); + + return NULL; } static void kvm_destroy_one_plane(struct kvm_plane *plane) { + kvm_arch_plane_destroy(plane); kfree(plane); } -- 2.53.0 From: Joerg Roedel Give architectures a place to store their VCPU state which is shared across all planes. Signed-off-by: Joerg Roedel --- arch/arm64/include/asm/kvm_host.h | 5 +++++ arch/loongarch/include/asm/kvm_host.h | 5 +++++ arch/mips/include/asm/kvm_host.h | 5 +++++ arch/powerpc/include/asm/kvm_host.h | 5 +++++ arch/riscv/include/asm/kvm_host.h | 5 +++++ arch/s390/include/asm/kvm_host.h | 5 +++++ arch/x86/include/asm/kvm_host.h | 5 +++++ include/linux/kvm_host.h | 2 ++ include/linux/kvm_types.h | 1 + virt/kvm/kvm_main.c | 8 ++++++++ 10 files changed, 46 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e9cca2adb371..de9ca00ce4f4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -869,6 +869,11 @@ struct vcpu_reset_state { struct vncr_tlb; +struct kvm_vcpu_arch_common {}; + +static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; } +static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {} + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 225aa87ebbdd..7317dceda6b4 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -150,6 +150,11 @@ struct kvm_arch { struct loongarch_pch_pic *pch_pic; }; +struct kvm_vcpu_arch_common {}; + +static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; } +static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {} + #define CSR_MAX_NUMS 0x800 struct loongarch_csrs { diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index b01911eb9064..c48bca79207b 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -194,6 +194,11 @@ struct kvm_arch { #endif }; +struct kvm_vcpu_arch_common {}; + +static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; } +static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {} + #define N_MIPS_COPROC_REGS 32 #define N_MIPS_COPROC_SEL 8 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c5b9fbaf34f3..47d9900c4f85 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -349,6 +349,11 @@ struct kvm_arch { #endif }; +struct kvm_vcpu_arch_common {}; + +static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; } +static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {} + #define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff) #define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) #define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index bcbf487d4cb7..397491587f5b 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -107,6 +107,11 @@ struct kvm_arch { bool mp_state_reset; }; +struct kvm_vcpu_arch_common {}; + +static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; } +static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {} + struct kvm_cpu_trap { unsigned long sepc; unsigned long scause; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index bb3bfbfd35d8..90fd8c0f1a2b 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -413,6 +413,11 @@ struct kvm_s390_pv_vcpu { unsigned long stor_base; }; +struct kvm_vcpu_arch_common {}; + +static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; } +static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {} + struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; /* if vsie is active, currently executed shadow sie control block */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd95c70bfdba..1393566741a0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -794,6 +794,11 @@ enum kvm_only_cpuid_leafs { NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; +struct kvm_vcpu_arch_common {}; + +static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; } +static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {} + struct kvm_vcpu_arch { /* * rip and regs accesses must go through diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4a0eaa1de479..291bccce9b74 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -375,6 +375,8 @@ struct kvm_vcpu_common { bool scheduled_out; struct kvm_dirty_ring dirty_ring; + + struct kvm_vcpu_arch_common arch; }; struct kvm_vcpu { diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 07e82928c948..06799efe6a12 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -46,6 +46,7 @@ struct kvm_plane; struct kvm_run; struct kvm_userspace_memory_region; struct kvm_vcpu; +struct kvm_vcpu_common; struct kvm_vcpu_init; struct kvm_memslots; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 91fb9abf9b31..7a0b632e3ac0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -490,6 +490,10 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned goto out_drop_counter; } + r = kvm_arch_vcpu_common_init(common); + if (r) + goto out_free_dirty_ring; + vcpu->common = no_free_ptr(common); kvm_vcpu_set_in_spin_loop(vcpu, false); @@ -497,6 +501,8 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned return 0; +out_free_dirty_ring: + kvm_dirty_ring_free(&common->dirty_ring); out_drop_counter: mutex_lock(&kvm->lock); kvm->created_vcpus--; @@ -548,6 +554,8 @@ static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu) kvm->created_vcpus--; mutex_unlock(&common->kvm->lock); + kvm_arch_vcpu_common_destroy(common); + /* * No need for rcu_read_lock as VCPU_RUN is the only place that changes * the common->pid pointer, and at destruction time all file descriptors -- 2.53.0 From: Paolo Bonzini Introduce an architecture call-back to request the VM-specific maximum number of supported planes. Use that to implement the KVM_CAP_PLANES capability check. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/arm64/kvm/arm.c | 5 +++++ arch/loongarch/kvm/vm.c | 5 +++++ arch/mips/kvm/mips.c | 5 +++++ arch/powerpc/kvm/powerpc.c | 5 +++++ arch/riscv/kvm/main.c | 5 +++++ arch/s390/kvm/kvm-s390.c | 5 +++++ arch/x86/kvm/x86.c | 5 +++++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 4 ++++ 9 files changed, 41 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1e2f42134b74..7e6d2773fd39 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -200,6 +200,11 @@ static int kvm_arm_default_max_vcpus(void) return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; } +unsigned kvm_arch_max_planes(struct kvm *kvm) +{ + return 1; +} + /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 1317c718f896..14f1232c6e0c 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -109,6 +109,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->arch.phyid_map = NULL; } +unsigned kvm_arch_max_planes(struct kvm *kvm) +{ + return 1; +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 776aba0af096..60870452119d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -115,6 +115,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return !!(vcpu->arch.pending_exceptions); } +unsigned kvm_arch_max_planes(struct kvm *kvm) +{ + return 1; +} + bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { return false; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 5d94e0f676ec..cfa40be20e00 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -499,6 +499,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm) module_put(kvm->arch.kvm_ops->owner); } +unsigned kvm_arch_max_planes(struct kvm *kvm) +{ + return 1; +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index cb8a65273c1f..5adba3a455a3 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -17,6 +17,11 @@ DEFINE_STATIC_KEY_FALSE(kvm_riscv_vsstage_tlb_no_gpa); +unsigned kvm_arch_max_planes(struct kvm *kvm) +{ + return 1; +} + static void kvm_riscv_setup_vendor_features(void) { /* Andes AX66: split two-stage TLBs */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e6fe83da172f..24f24ea95f86 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3186,6 +3186,11 @@ static void sca_dispose(struct kvm *kvm) kvm->arch.sca = NULL; } +unsigned kvm_arch_max_planes(struct kvm *kvm) +{ + return 1; +} + void kvm_arch_free_vm(struct kvm *kvm) { if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 50601ac2828f..25299c8c28e3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -482,6 +482,11 @@ static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; static unsigned int num_msr_based_features; +unsigned kvm_arch_max_planes(struct kvm *kvm) +{ + return 1; +} + /* * All feature MSRs except uCode revID, which tracks the currently loaded uCode * patch, are immutable once the vCPU model is defined. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 291bccce9b74..3ecd472c7cfa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1156,6 +1156,8 @@ void kvm_unlock_all_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); +unsigned kvm_arch_max_planes(struct kvm *kvm); + #ifdef CONFIG_KVM_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); #else diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7a0b632e3ac0..5a0277e2ac7c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5079,6 +5079,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_GUEST_MEMFD_FLAGS: return kvm_gmem_get_supported_flags(kvm); #endif + case KVM_CAP_PLANES: + if (kvm) + return kvm_arch_max_planes(kvm); + return 1; default: break; } -- 2.53.0 From: Paolo Bonzini Add a new VM ioctl to create a new plane. It returns a new file descriptor which supports per-plane ioctls. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- include/uapi/linux/kvm.h | 2 ++ virt/kvm/kvm_main.c | 75 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 813f964a6dc1..24e34b8e4819 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1355,6 +1355,8 @@ struct kvm_s390_keyop { #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) +#define KVM_CREATE_PLANE _IO(KVMIO, 0xe4) + /* * ioctls for vcpu fds */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5a0277e2ac7c..03a44ff62f0f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4843,6 +4843,34 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, } #endif +static long kvm_plane_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_plane *plane = filp->private_data; + + if (plane->kvm->mm != current->mm || plane->kvm->vm_dead) + return -EIO; + + switch (ioctl) { + default: + return -ENOTTY; + } +} + +static int kvm_plane_release(struct inode *inode, struct file *filp) +{ + struct kvm_plane *plane = filp->private_data; + + kvm_put_kvm(plane->kvm); + return 0; +} + +static struct file_operations kvm_plane_fops = { + .unlocked_ioctl = kvm_plane_ioctl, + .release = kvm_plane_release, + KVM_COMPAT(kvm_plane_ioctl), +}; + static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) { struct kvm_device *dev = filp->private_data; @@ -5288,6 +5316,49 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm) return fd; } +static int kvm_vm_ioctl_create_plane(struct kvm *kvm, unsigned id) +{ + struct kvm_plane *plane; + struct file *file; + int r, fd; + + if (id >= kvm_arch_max_planes(kvm) || + WARN_ON_ONCE(id >= KVM_MAX_PLANES)) + return -EINVAL; + + guard(mutex)(&kvm->lock); + if (kvm->planes[id]) + return -EEXIST; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return fd; + + plane = kvm_create_plane(kvm, id); + if (!plane) { + r = -ENOMEM; + goto put_fd; + } + + kvm_get_kvm(kvm); + file = anon_inode_getfile("kvm-plane", &kvm_plane_fops, plane, O_RDWR); + if (IS_ERR(file)) { + r = PTR_ERR(file); + goto put_kvm; + } + + fd_install(fd, file); + return fd; + +put_kvm: + kvm_put_kvm(kvm); + kvm_destroy_one_plane(plane); +put_fd: + put_unused_fd(fd); + return r; +} + + #define SANITY_CHECK_MEM_REGION_FIELD(field) \ do { \ BUILD_BUG_ON(offsetof(struct kvm_userspace_memory_region, field) != \ @@ -5306,6 +5377,9 @@ static long kvm_vm_ioctl(struct file *filp, if (kvm->mm != current->mm || kvm->vm_dead) return -EIO; switch (ioctl) { + case KVM_CREATE_PLANE: + r = kvm_vm_ioctl_create_plane(kvm, arg); + break; case KVM_CREATE_VCPU: r = kvm_vm_ioctl_create_vcpu(kvm, arg); break; @@ -6676,6 +6750,7 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) kvm_chardev_ops.owner = module; kvm_vm_fops.owner = module; kvm_vcpu_fops.owner = module; + kvm_plane_fops.owner = module; kvm_device_fops.owner = module; kvm_preempt_ops.sched_in = kvm_sched_in; -- 2.53.0 From: Paolo Bonzini Add a new exit-type to KVM for telling user-space that a plane-vcpu is missing. Create a helper which fills out the kvm_run exit structure. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 2 ++ include/uapi/linux/kvm.h | 10 ++++++++++ virt/kvm/kvm_main.c | 12 ++++++++++++ 3 files changed, 24 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3ecd472c7cfa..90b97137840e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1649,6 +1649,8 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); +int kvm_request_create_plane(struct kvm_vcpu *vcpu, unsigned plane, u64 apic_id); + #ifndef CONFIG_S390 void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 24e34b8e4819..a88d987c7882 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -146,6 +146,13 @@ struct kvm_exit_snp_req_certs { __u64 ret; }; +struct kvm_plane_event_exit { +#define KVM_PLANE_EVENT_CREATE_VCPU 1 + __u32 cause; + __u32 plane; + __u64 extra[8]; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -193,6 +200,7 @@ struct kvm_exit_snp_req_certs { #define KVM_EXIT_ARM_SEA 41 #define KVM_EXIT_ARM_LDST64B 42 #define KVM_EXIT_SNP_REQ_CERTS 43 +#define KVM_EXIT_PLANE_EVENT 44 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -497,6 +505,8 @@ struct kvm_run { } arm_sea; /* KVM_EXIT_SNP_REQ_CERTS */ struct kvm_exit_snp_req_certs snp_req_certs; + /* KVM_EXIT_PLANE_EVENT */ + struct kvm_plane_event_exit plane_event; /* Fix the size of the union. */ char padding[256]; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 03a44ff62f0f..f0f78bb74e51 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3973,6 +3973,18 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_wake_up); +int kvm_request_create_plane(struct kvm_vcpu *vcpu, unsigned plane, u64 apic_id) +{ + vcpu->run->exit_reason = KVM_EXIT_PLANE_EVENT; + memset(&vcpu->run->plane_event, 0, sizeof(vcpu->run->plane_event)); + vcpu->run->plane_event.cause = KVM_PLANE_EVENT_CREATE_VCPU; + vcpu->run->plane_event.plane = plane; + vcpu->run->plane_event.extra[0] = apic_id; + + return 0; +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_request_create_plane); + #ifndef CONFIG_S390 /* * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. -- 2.53.0 From: Joerg Roedel Move plane allocation to architecture code so that per-arch implementations can embed the structure into another one for keeping additional data. Signed-off-by: Joerg Roedel --- arch/arm64/kvm/arm.c | 13 +++++++++++++ arch/loongarch/kvm/vm.c | 13 +++++++++++++ arch/mips/kvm/mips.c | 13 +++++++++++++ arch/powerpc/kvm/powerpc.c | 13 +++++++++++++ arch/riscv/kvm/main.c | 13 +++++++++++++ arch/s390/kvm/kvm-s390.c | 13 +++++++++++++ arch/x86/kvm/x86.c | 13 +++++++++++++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 6 +++--- 9 files changed, 96 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7e6d2773fd39..d7a4b9b239dc 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -205,6 +205,19 @@ unsigned kvm_arch_max_planes(struct kvm *kvm) return 1; } +struct kvm_plane *kvm_alloc_plane(void) +{ + /* For better type checking, do not return kzalloc() value directly */ + struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + + return plane; +} + +void kvm_free_plane(struct kvm_plane *plane) +{ + kfree(plane); +} + /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 14f1232c6e0c..e4d2814b717d 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -114,6 +114,19 @@ unsigned kvm_arch_max_planes(struct kvm *kvm) return 1; } +struct kvm_plane *kvm_alloc_plane(void) +{ + /* For better type checking, do not return kzalloc() value directly */ + struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + + return plane; +} + +void kvm_free_plane(struct kvm_plane *plane) +{ + kfree(plane); +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 60870452119d..e22d2a267e03 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -120,6 +120,19 @@ unsigned kvm_arch_max_planes(struct kvm *kvm) return 1; } +struct kvm_plane *kvm_alloc_plane(void) +{ + /* For better type checking, do not return kzalloc() value directly */ + struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + + return plane; +} + +void kvm_free_plane(struct kvm_plane *plane) +{ + kfree(plane); +} + bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { return false; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cfa40be20e00..35658cded0cb 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -504,6 +504,19 @@ unsigned kvm_arch_max_planes(struct kvm *kvm) return 1; } +struct kvm_plane *kvm_alloc_plane(void) +{ + /* For better type checking, do not return kzalloc() value directly */ + struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + + return plane; +} + +void kvm_free_plane(struct kvm_plane *plane) +{ + kfree(plane); +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 5adba3a455a3..46834b2ddfae 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -22,6 +22,19 @@ unsigned kvm_arch_max_planes(struct kvm *kvm) return 1; } +struct kvm_plane *kvm_alloc_plane(void) +{ + /* For better type checking, do not return kzalloc() value directly */ + struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + + return plane; +} + +void kvm_free_plane(struct kvm_plane *plane) +{ + kfree(plane); +} + static void kvm_riscv_setup_vendor_features(void) { /* Andes AX66: split two-stage TLBs */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 24f24ea95f86..94c40b2aa759 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3191,6 +3191,19 @@ unsigned kvm_arch_max_planes(struct kvm *kvm) return 1; } +struct kvm_plane *kvm_alloc_plane(void) +{ + /* For better type checking, do not return kzalloc() value directly */ + struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + + return plane; +} + +void kvm_free_plane(struct kvm_plane *plane) +{ + kfree(plane); +} + void kvm_arch_free_vm(struct kvm *kvm) { if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 25299c8c28e3..d6bf0425525c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -487,6 +487,19 @@ unsigned kvm_arch_max_planes(struct kvm *kvm) return 1; } +struct kvm_plane *kvm_alloc_plane(void) +{ + /* For better type checking, do not return kzalloc() value directly */ + struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + + return plane; +} + +void kvm_free_plane(struct kvm_plane *plane) +{ + kfree(plane); +} + /* * All feature MSRs except uCode revID, which tracks the currently loaded uCode * patch, are immutable once the vCPU model is defined. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 90b97137840e..55e3e9046975 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1157,6 +1157,8 @@ void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); unsigned kvm_arch_max_planes(struct kvm *kvm); +struct kvm_plane *kvm_alloc_plane(void); +void kvm_free_plane(struct kvm_plane *plane); #ifdef CONFIG_KVM_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f0f78bb74e51..4f2c8f46a0d3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1209,7 +1209,7 @@ static void kvm_disable_virtualization(void); static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level) { - struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); + struct kvm_plane *plane = kvm_alloc_plane(); if (!plane) return NULL; @@ -1227,7 +1227,7 @@ static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level) return plane; out_free_plane: - kfree(plane); + kvm_free_plane(plane); return NULL; } @@ -1235,7 +1235,7 @@ static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level) static void kvm_destroy_one_plane(struct kvm_plane *plane) { kvm_arch_plane_destroy(plane); - kfree(plane); + kvm_free_plane(plane); } static void kvm_destroy_planes(struct kvm *kvm) -- 2.53.0 From: Paolo Bonzini Share the struct kvm_run across all planes for one VCPU id. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 3 +++ virt/kvm/kvm_main.c | 29 ++++++++++++++++------------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 55e3e9046975..385e1ee8fd3a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -327,6 +327,9 @@ struct kvm_mmio_fragment { struct kvm_vcpu_common { struct kvm *kvm; + /* kvm_run struct shared across all planes */ + struct kvm_run *run; + int vcpu_idx; /* index into kvm->planes[]->vcpu_array */ /* Currently active VCPU */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4f2c8f46a0d3..2d0d5f4fd356 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -441,6 +441,7 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned long id) { struct kvm_vcpu_common *common __free(kfree) = kzalloc(sizeof(*common), GFP_KERNEL_ACCOUNT); + struct page *page; int r; /* @@ -466,6 +467,14 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned common->vcpu_idx = atomic_read(&kvm->online_vcpus); + BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto out_drop_counter; + } + common->run = page_address(page); + mutex_init(&common->mutex); #ifndef __KVM_HAVE_ARCH_WQP @@ -487,7 +496,7 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned r = kvm_dirty_ring_alloc(kvm, &common->dirty_ring, id, kvm->dirty_ring_size); if (r) - goto out_drop_counter; + goto out_free_run; } r = kvm_arch_vcpu_common_init(common); @@ -503,6 +512,8 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned out_free_dirty_ring: kvm_dirty_ring_free(&common->dirty_ring); +out_free_run: + free_page((unsigned long)common->run); out_drop_counter: mutex_lock(&kvm->lock); kvm->created_vcpus--; @@ -546,6 +557,7 @@ static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu) struct kvm *kvm = common->kvm; vcpu->common = NULL; + vcpu->run = NULL; if (vcpu->plane_level != 0) return; @@ -563,6 +575,7 @@ static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu) */ put_pid(common->pid); kvm_dirty_ring_free(&common->dirty_ring); + free_page((unsigned long)common->run); kfree(common); } @@ -4337,7 +4350,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) { int r = -EINVAL; struct kvm_vcpu *vcpu; - struct page *page; mutex_lock(&kvm->lock); if (kvm->created_vcpus >= kvm->max_vcpus) { @@ -4359,20 +4371,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) goto vcpu_free; vcpu->vcpu_idx = vcpu->common->vcpu_idx; - - BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); - page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!page) { - r = -ENOMEM; - goto vcpu_free_common; - } - vcpu->run = page_address(page); + vcpu->run = vcpu->common->run; kvm_vcpu_init(vcpu, kvm, id); r = kvm_arch_vcpu_create(vcpu); if (r) - goto vcpu_free_run_page; + goto vcpu_free_common; mutex_lock(&kvm->lock); @@ -4415,8 +4420,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) unlock_vcpu_destroy: mutex_unlock(&kvm->lock); kvm_arch_vcpu_destroy(vcpu); -vcpu_free_run_page: - free_page((unsigned long)vcpu->run); vcpu_free_common: kvm_vcpu_common_destroy(vcpu); vcpu_free: -- 2.53.0 From: Joerg Roedel Implement the KVM_CREATE_VCPU ioctl per plane. Also introduce an empty IOCTL path for the plane-vcpus, including per-architecture call-backs. Co-developed-by: Carlos López Signed-off-by: Joerg Roedel --- arch/arm64/kvm/arm.c | 5 ++ arch/loongarch/kvm/vcpu.c | 5 ++ arch/mips/kvm/mips.c | 5 ++ arch/powerpc/kvm/powerpc.c | 5 ++ arch/riscv/kvm/vcpu.c | 5 ++ arch/s390/kvm/kvm-s390.c | 5 ++ arch/x86/kvm/x86.c | 29 ++++++++++++ include/linux/kvm_host.h | 12 +++-- virt/kvm/kvm_main.c | 97 ++++++++++++++++++++++++++++---------- 9 files changed, 141 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d7a4b9b239dc..b2bfea5df7e0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1824,6 +1824,11 @@ static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, return __kvm_arm_vcpu_set_events(vcpu, events); } +bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl) +{ + return false; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index bab3c66ae58d..0b66b8186923 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1232,6 +1232,11 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, return ret; } +bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl) +{ + return false; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index e22d2a267e03..28795bad178b 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -933,6 +933,11 @@ long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, return -ENOIOCTLCMD; } +bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl) +{ + return false; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 35658cded0cb..476f7ea02c79 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -2055,6 +2055,11 @@ long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, return -ENOIOCTLCMD; } +bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl) +{ + return false; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 66cde226eb87..17680b659bdd 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -263,6 +263,11 @@ long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, return -ENOIOCTLCMD; } +bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl) +{ + return false; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 94c40b2aa759..261859cb1bb6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5454,6 +5454,11 @@ static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu, return ret; } +bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl) +{ + return false; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d6bf0425525c..623838885753 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6227,6 +6227,35 @@ static int kvm_get_reg_list(struct kvm_vcpu *vcpu, return 0; } +bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl) +{ + switch (ioctl) { + case KVM_GET_DEBUGREGS: + case KVM_SET_DEBUGREGS: + case KVM_GET_LAPIC: + case KVM_SET_LAPIC: + case KVM_GET_MSRS: + case KVM_SET_MSRS: + case KVM_GET_NESTED_STATE: + case KVM_SET_NESTED_STATE: + case KVM_GET_ONE_REG: + case KVM_SET_ONE_REG: + case KVM_GET_SREGS2: + case KVM_SET_SREGS2: + case KVM_GET_VCPU_EVENTS: + case KVM_SET_VCPU_EVENTS: + case KVM_GET_XCRS: + case KVM_SET_XCRS: + case KVM_GET_XSAVE: + case KVM_GET_XSAVE2: + case KVM_SET_XSAVE: + case KVM_GET_REG_LIST: + return true; + default: + return false; + } +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 385e1ee8fd3a..b8c3f8f11cb4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1126,7 +1126,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) #define kvm_for_each_vcpu(idx, vcpup, kvm) \ plane_for_each_vcpu(idx, vcpup, kvm->planes[0]) -static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) +static inline struct kvm_vcpu *plane_get_vcpu_by_id(struct kvm_plane *plane, int id) { struct kvm_vcpu *vcpu = NULL; unsigned long i; @@ -1134,15 +1134,20 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) if (id < 0) return NULL; if (id < KVM_MAX_VCPUS) - vcpu = kvm_get_vcpu(kvm, id); + vcpu = plane_get_vcpu(plane, id); if (vcpu && vcpu->vcpu_id == id) return vcpu; - kvm_for_each_vcpu(i, vcpu, kvm) + plane_for_each_vcpu(i, vcpu, plane) if (vcpu->vcpu_id == id) return vcpu; return NULL; } +static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) +{ + return plane_get_vcpu_by_id(kvm->planes[0], id); +} + static inline bool kvm_is_vcpu_creation_in_progress(struct kvm *kvm) { lockdep_assert_held(&kvm->lock); @@ -1688,6 +1693,7 @@ bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); +bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2d0d5f4fd356..8839f91fd15e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -538,14 +538,11 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { vcpu->cpu = -1; vcpu->kvm = kvm; - vcpu->plane = kvm->planes[0]; vcpu->vcpu_id = id; kvm_async_pf_vcpu_init(vcpu); vcpu->last_used_slot = NULL; - vcpu->plane_level = 0; - /* Fill the stats id string for the vcpu */ snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", task_pid_nr(current), id); @@ -4306,9 +4303,13 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - char name[8 + 1 + ITOA_MAX_LEN + 1]; + char name[14 + 1 + (2 * ITOA_MAX_LEN) + 1]; + + if (vcpu->plane_level == 0) + snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); + else + snprintf(name, sizeof(name), "kvm-vcpu-plane%d:%d", vcpu->plane_level, vcpu->vcpu_id); - snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } @@ -4327,13 +4328,17 @@ DEFINE_SIMPLE_ATTRIBUTE(vcpu_get_pid_fops, vcpu_get_pid, NULL, "%llu\n"); static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { + char dir_name[10 + (2 * ITOA_MAX_LEN) + 1]; struct dentry *debugfs_dentry; - char dir_name[ITOA_MAX_LEN * 2]; if (!debugfs_initialized()) return; - snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); + if (vcpu->plane_level == 0) + snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); + else + snprintf(dir_name, sizeof(dir_name), "vcpu%d-plane%d", vcpu->plane_level, vcpu->vcpu_id); + debugfs_dentry = debugfs_create_dir(dir_name, vcpu->kvm->debugfs_dentry); debugfs_create_file("pid", 0444, debugfs_dentry, vcpu, @@ -4346,10 +4351,11 @@ static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) /* * Creates some virtual cpus. Good luck creating more than one. */ -static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) +static int kvm_plane_ioctl_create_vcpu(struct kvm_plane *plane, unsigned long id) { - int r = -EINVAL; + struct kvm *kvm = plane->kvm; struct kvm_vcpu *vcpu; + int r; mutex_lock(&kvm->lock); if (kvm->created_vcpus >= kvm->max_vcpus) { @@ -4366,11 +4372,28 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) if (!vcpu) return -ENOMEM; - r = kvm_vcpu_init_common(vcpu, kvm, id); - if (r) + r = -EEXIST; + if (plane_get_vcpu_by_id(plane, id)) goto vcpu_free; + if (plane->level > 0) { + struct kvm_vcpu *vcpu_plane0 = kvm_get_vcpu_by_id(kvm, id); + + /* Plane0 VCPU must exist before creating non-plane0 VCPUs */ + r = -EINVAL; + if (vcpu_plane0 == NULL) + goto vcpu_free; + + vcpu->common = vcpu_plane0->common; + } else { + r = kvm_vcpu_init_common(vcpu, kvm, id); + if (r) + goto vcpu_free; + } + vcpu->vcpu_idx = vcpu->common->vcpu_idx; + vcpu->plane = plane; + vcpu->plane_level = plane->level; vcpu->run = vcpu->common->run; kvm_vcpu_init(vcpu, kvm, id); @@ -4381,12 +4404,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) mutex_lock(&kvm->lock); - if (kvm_get_vcpu_by_id(kvm, id)) { - r = -EEXIST; - goto unlock_vcpu_destroy; - } - - r = xa_insert(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); + r = xa_insert(&plane->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); WARN_ON_ONCE(r == -EBUSY); if (r) goto unlock_vcpu_destroy; @@ -4416,7 +4434,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) kvm_put_xa_erase: kvm_vcpu_unlock(vcpu); kvm_put_kvm_no_destroy(kvm); - xa_erase(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx); + xa_erase(&plane->vcpu_array, vcpu->vcpu_idx); unlock_vcpu_destroy: mutex_unlock(&kvm->lock); kvm_arch_vcpu_destroy(vcpu); @@ -4550,7 +4568,7 @@ static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu) /* * Acquire and release the vCPU's mutex to wait for vCPU creation to - * complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU + * complete (kvm_plane_ioctl_create_vcpu() holds the mutex until the vCPU * is fully online). */ if (mutex_lock_killable(kvm_vcpu_mutex(vcpu))) @@ -4564,6 +4582,22 @@ static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu) return 0; } +static inline bool kvm_is_vcpu_plane_ioctl(unsigned ioctl) +{ + switch (ioctl) { + case KVM_GET_FPU: + case KVM_SET_FPU: + case KVM_GET_REGS: + case KVM_SET_REGS: + case KVM_GET_SREGS: + case KVM_SET_SREGS: + case KVM_TRANSLATE: + return true; + default: + return kvm_arch_is_vcpu_plane_ioctl(ioctl); + } +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4576,6 +4610,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) return -EIO; + if (vcpu->plane_level > 0 && !kvm_is_vcpu_plane_ioctl(ioctl)) + return -EINVAL; + if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) return -EINVAL; @@ -4858,6 +4895,21 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, } #endif +static long __kvm_plane_ioctl(struct kvm_plane *plane, unsigned int ioctl, unsigned long arg) +{ + long r; + + switch (ioctl) { + case KVM_CREATE_VCPU: + r = kvm_plane_ioctl_create_vcpu(plane, arg); + break; + default: + r = -ENOTTY; + } + + return r; +} + static long kvm_plane_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4866,10 +4918,7 @@ static long kvm_plane_ioctl(struct file *filp, unsigned int ioctl, if (plane->kvm->mm != current->mm || plane->kvm->vm_dead) return -EIO; - switch (ioctl) { - default: - return -ENOTTY; - } + return __kvm_plane_ioctl(plane, ioctl, arg); } static int kvm_plane_release(struct inode *inode, struct file *filp) @@ -5396,7 +5445,7 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_create_plane(kvm, arg); break; case KVM_CREATE_VCPU: - r = kvm_vm_ioctl_create_vcpu(kvm, arg); + r = __kvm_plane_ioctl(kvm->planes[0], ioctl, arg); break; case KVM_ENABLE_CAP: { struct kvm_enable_cap cap; -- 2.53.0 From: Joerg Roedel Introduce an array which keeps track of per-plane VCPU instances for a single VCPU index. This is a short-cut to not always tranverse the xarrays on plane switches. Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 6 ++++++ virt/kvm/kvm_main.c | 3 +++ 2 files changed, 9 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8c3f8f11cb4..5c3f9dfa15ea 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -335,6 +335,8 @@ struct kvm_vcpu_common { /* Currently active VCPU */ struct kvm_vcpu *current_vcpu; + struct kvm_vcpu *vcpus[KVM_MAX_PLANES]; + /* Locks */ int ____srcu_idx; /* Don't use this directly. You've been warned. */ #ifdef CONFIG_PROVE_RCU @@ -382,6 +384,10 @@ struct kvm_vcpu_common { struct kvm_vcpu_arch_common arch; }; +#define vcpu_for_each_plane(common, i, v) \ + for ((i) = 0; (i) < KVM_MAX_PLANES; ++(i)) \ + if (((v) = common->vcpus[(i)]) != NULL) + struct kvm_vcpu { struct kvm *kvm; struct kvm_plane *plane; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8839f91fd15e..9d30fd85ce5f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -524,6 +524,8 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned static void kvm_vcpu_finish_common(struct kvm_vcpu *vcpu) { + WARN_ON(vcpu->common->vcpus[vcpu->plane_level] != NULL); + vcpu->common->vcpus[vcpu->plane_level] = vcpu; smp_wmb(); if (vcpu->plane_level == 0) { /* @@ -555,6 +557,7 @@ static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu) vcpu->common = NULL; vcpu->run = NULL; + common->vcpus[vcpu->plane_level] = NULL; if (vcpu->plane_level != 0) return; -- 2.53.0 From: Joerg Roedel The algorithm is to always run the lowest runnable plane. Plane switches are done by stopping the current plane and setting another runnable. Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 16 ++++++++++++++ virt/kvm/kvm_main.c | 45 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c3f9dfa15ea..e3611e6cc3e4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -168,6 +168,7 @@ static inline bool kvm_is_error_gpa(gpa_t gpa) #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_DIRTY_RING_SOFT_FULL 3 +#define KVM_REQ_PLANE_RESCHED 4 #define KVM_REQUEST_ARCH_BASE 8 /* @@ -324,6 +325,8 @@ struct kvm_mmio_fragment { unsigned int len; }; + + struct kvm_vcpu_common { struct kvm *kvm; @@ -381,6 +384,8 @@ struct kvm_vcpu_common { struct kvm_dirty_ring dirty_ring; + bool plane_switch; + struct kvm_vcpu_arch_common arch; }; @@ -388,6 +393,12 @@ struct kvm_vcpu_common { for ((i) = 0; (i) < KVM_MAX_PLANES; ++(i)) \ if (((v) = common->vcpus[(i)]) != NULL) +/* Tracked per plane-VCPU - used for deciding which plane-vcpu to run */ +enum kvm_vcpu_state { + STOPPED, + RUNNABLE, +}; + struct kvm_vcpu { struct kvm *kvm; struct kvm_plane *plane; @@ -401,6 +412,7 @@ struct kvm_vcpu { struct kvm_run *run; u64 plane_requests; + enum kvm_vcpu_state plane_state; /* S390 only */ bool valid_wakeup; @@ -440,6 +452,10 @@ struct kvm_vcpu { unsigned plane_level; }; +void kvm_vcpu_set_plane_runnable(struct kvm_vcpu *vcpu); +void kvm_vcpu_set_plane_stopped(struct kvm_vcpu *vcpu); +struct kvm_vcpu *kvm_vcpu_select_plane(struct kvm_vcpu *vcpu); + static inline bool kvm_vcpu_wants_to_run(struct kvm_vcpu *vcpu) { return vcpu->common->wants_to_run; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9d30fd85ce5f..a30123b77112 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4397,6 +4397,7 @@ static int kvm_plane_ioctl_create_vcpu(struct kvm_plane *plane, unsigned long id vcpu->vcpu_idx = vcpu->common->vcpu_idx; vcpu->plane = plane; vcpu->plane_level = plane->level; + vcpu->plane_state = STOPPED; vcpu->run = vcpu->common->run; kvm_vcpu_init(vcpu, kvm, id); @@ -4938,6 +4939,50 @@ static struct file_operations kvm_plane_fops = { KVM_COMPAT(kvm_plane_ioctl), }; +void kvm_vcpu_set_plane_runnable(struct kvm_vcpu *vcpu) +{ + vcpu->plane_state = RUNNABLE; + vcpu->common->plane_switch = true; + kvm_make_request(KVM_REQ_PLANE_RESCHED, vcpu); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_set_plane_runnable); + +void kvm_vcpu_set_plane_stopped(struct kvm_vcpu *vcpu) +{ + vcpu->plane_state = STOPPED; + kvm_make_request(KVM_REQ_PLANE_RESCHED, vcpu); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_set_plane_stopped); + +struct kvm_vcpu *kvm_vcpu_select_plane(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_common *common = vcpu->common; + struct kvm_vcpu *ret = NULL; + unsigned i; + + for (i = 0; i < KVM_MAX_PLANES; i++) { + if (common->vcpus[i] == NULL) + continue; + + if (common->vcpus[i]->plane_state == RUNNABLE) { + ret = common->vcpus[i]; + break; + } + } + + if (ret == NULL) { + ret = common->vcpus[0]; + ret->plane_state = RUNNABLE; + } + + common->current_vcpu = ret; + + common->plane_switch = false; + + return ret; +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_select_plane); + static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) { struct kvm_device *dev = filp->private_data; -- 2.53.0 From: Joerg Roedel Track the target plane-level of MSI irqs in struct kvm_kernel_irq_routing_entry. This will be used to send MSI IRQs to the right plane-level once planes are supported. Signed-off-by: Joerg Roedel --- arch/arm64/kvm/vgic/vgic-irqfd.c | 1 + arch/loongarch/kvm/irqfd.c | 1 + arch/powerpc/kvm/mpic.c | 1 + arch/riscv/kvm/vm.c | 1 + arch/x86/kvm/irq.c | 1 + include/linux/kvm_host.h | 1 + virt/kvm/irqchip.c | 1 + 7 files changed, 7 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index b9b86e3a6c86..479b896c8954 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -57,6 +57,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.data = ue->u.msi.data; e->msi.flags = ue->flags; e->msi.devid = ue->u.msi.devid; + e->msi.plane_level = 0; break; default: goto out; diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c index f4f953b22419..50f0c32df46c 100644 --- a/arch/loongarch/kvm/irqfd.c +++ b/arch/loongarch/kvm/irqfd.c @@ -60,6 +60,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; + e->msi.plane_level = 0; return 0; default: return -EINVAL; diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 3070f36d9fb8..0f568f5fff8b 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1841,6 +1841,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; + e->msi.plane_level = 0; break; default: goto out; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index a9f083feeb76..f518247e699b 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -138,6 +138,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.data = ue->u.msi.data; e->msi.flags = ue->flags; e->msi.devid = ue->u.msi.devid; + e->msi.plane_level = 0; break; default: goto out; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 9519fec09ee6..b7e08eddb765 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -332,6 +332,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; + e->msi.plane_level = 0; if (kvm_msi_route_invalid(kvm, e)) return -EINVAL; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e3611e6cc3e4..16dcca3132d3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -782,6 +782,7 @@ struct kvm_kernel_irq_routing_entry { u32 data; u32 flags; u32 devid; + unsigned plane_level; } msi; struct kvm_s390_adapter_int adapter; struct kvm_hv_sint hv_sint; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 462c70621247..ae47e56176f1 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -57,6 +57,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) route.msi.data = msi->data; route.msi.flags = msi->flags; route.msi.devid = msi->devid; + route.msi.plane_level = 0; return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); } -- 2.53.0 From: Joerg Roedel The plane_level is used to route MSI IRQs to the correct plane. Signed-off-by: Joerg Roedel --- arch/arm64/kvm/vgic/vgic-irqfd.c | 6 ++++-- arch/loongarch/kvm/irqfd.c | 6 ++++-- arch/powerpc/kvm/mpic.c | 5 +++-- arch/riscv/kvm/vm.c | 5 +++-- arch/s390/kvm/interrupt.c | 3 ++- arch/x86/kvm/irq.c | 7 ++++--- include/linux/kvm_host.h | 3 ++- virt/kvm/irqchip.c | 2 +- 8 files changed, 23 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 479b896c8954..53e5fcc591d7 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -33,11 +33,13 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, * @kvm: the VM this entry is applied to * @e: kvm kernel routing entry handle * @ue: user api routing entry handle + * @plane_level: target plane level * return 0 on success, -EINVAL on errors. */ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) + const struct kvm_irq_routing_entry *ue, + unsigned plane_level) { int r = -EINVAL; @@ -57,7 +59,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.data = ue->u.msi.data; e->msi.flags = ue->flags; e->msi.devid = ue->u.msi.devid; - e->msi.plane_level = 0; + e->msi.plane_level = plane_level; break; default: goto out; diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c index 50f0c32df46c..a36a8a9d8a66 100644 --- a/arch/loongarch/kvm/irqfd.c +++ b/arch/loongarch/kvm/irqfd.c @@ -39,11 +39,13 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, * @kvm: the VM this entry is applied to * @e: kvm kernel routing entry handle * @ue: user api routing entry handle + * @plane_level: target plane level * return 0 on success, -EINVAL on errors. */ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) + const struct kvm_irq_routing_entry *ue, + unsigned plane_level) { switch (ue->type) { case KVM_IRQ_ROUTING_IRQCHIP: @@ -60,7 +62,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; - e->msi.plane_level = 0; + e->msi.plane_level = plane_level; return 0; default: return -EINVAL; diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 0f568f5fff8b..6b6eba7fbf75 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1824,7 +1824,8 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) + const struct kvm_irq_routing_entry *ue, + unsigned plane_level) { int r = -EINVAL; @@ -1841,7 +1842,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; - e->msi.plane_level = 0; + e->msi.plane_level = plane_level; break; default: goto out; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index f518247e699b..6b3c8a0e74e2 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -118,7 +118,8 @@ bool kvm_arch_can_set_irq_routing(struct kvm *kvm) int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) + const struct kvm_irq_routing_entry *ue, + unsigned plane_level) { int r = -EINVAL; @@ -138,7 +139,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.data = ue->u.msi.data; e->msi.flags = ue->flags; e->msi.devid = ue->u.msi.devid; - e->msi.plane_level = 0; + e->msi.plane_level = plane_level; break; default: goto out; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1d66ef9f7527..dbd6029773aa 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2862,7 +2862,8 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) + const struct kvm_irq_routing_entry *ue, + unsigned plane_level) { const struct kvm_irq_routing_s390_adapter *adapter; u64 uaddr_s, uaddr_i; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index b7e08eddb765..d2ecfd54d57a 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -295,7 +295,8 @@ bool kvm_arch_can_set_irq_routing(struct kvm *kvm) int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) + const struct kvm_irq_routing_entry *ue, + unsigned plane_level) { /* We can't check irqchip_in_kernel() here as some callers are * currently initializing the irqchip. Other callers should therefore @@ -304,7 +305,7 @@ int kvm_set_routing_entry(struct kvm *kvm, switch (ue->type) { #ifdef CONFIG_KVM_IOAPIC case KVM_IRQ_ROUTING_IRQCHIP: - if (irqchip_split(kvm)) + if (irqchip_split(kvm) || plane_level != 0) return -EINVAL; e->irqchip.pin = ue->u.irqchip.pin; switch (ue->u.irqchip.irqchip) { @@ -332,7 +333,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; - e->msi.plane_level = 0; + e->msi.plane_level = plane_level; if (kvm_msi_route_invalid(kvm, e)) return -EINVAL; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 16dcca3132d3..cfb6911d6771 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2355,7 +2355,8 @@ int kvm_set_irq_routing(struct kvm *kvm, int kvm_init_irq_routing(struct kvm *kvm); int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue); + const struct kvm_irq_routing_entry *ue, + unsigned plane_level); void kvm_free_irq_routing(struct kvm *kvm); #else diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index ae47e56176f1..14480d1df4f9 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -146,7 +146,7 @@ static int setup_routing_entry(struct kvm *kvm, e->gsi = gsi; e->type = ue->type; - r = kvm_set_routing_entry(kvm, e, ue); + r = kvm_set_routing_entry(kvm, e, ue, 0); if (r) return r; if (e->type == KVM_IRQ_ROUTING_IRQCHIP) -- 2.53.0 From: Paolo Bonzini Allow the KVM_SIGNAL_MSI ioctl for every plane instead of per VM. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- include/linux/kvm_host.h | 2 +- virt/kvm/irqchip.c | 4 ++-- virt/kvm/kvm_main.c | 25 ++++++++++++++----------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cfb6911d6771..05a10836d92d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2370,7 +2370,7 @@ static inline int kvm_init_irq_routing(struct kvm *kvm) #endif -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi, unsigned plane_level); void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 14480d1df4f9..a4fea7d8dde6 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -45,7 +45,7 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) return irq_rt->chip[irqchip][pin]; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi, unsigned plane_level) { struct kvm_kernel_irq_routing_entry route; @@ -57,7 +57,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) route.msi.data = msi->data; route.msi.flags = msi->flags; route.msi.devid = msi->devid; - route.msi.plane_level = 0; + route.msi.plane_level = plane_level; return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a30123b77112..dc59f2f9d405 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4907,6 +4907,17 @@ static long __kvm_plane_ioctl(struct kvm_plane *plane, unsigned int ioctl, unsig case KVM_CREATE_VCPU: r = kvm_plane_ioctl_create_vcpu(plane, arg); break; +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_SIGNAL_MSI: { + void __user *argp = (void __user *)arg; + struct kvm_msi msi; + + if (copy_from_user(&msi, argp, sizeof(msi))) + return -EFAULT; + r = kvm_send_userspace_msi(plane->kvm, &msi, plane->level); + break; + } +#endif default: r = -ENOTTY; } @@ -5493,6 +5504,9 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_create_plane(kvm, arg); break; case KVM_CREATE_VCPU: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_SIGNAL_MSI: +#endif r = __kvm_plane_ioctl(kvm->planes[0], ioctl, arg); break; case KVM_ENABLE_CAP: { @@ -5597,17 +5611,6 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_ioeventfd(kvm, &data); break; } -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_SIGNAL_MSI: { - struct kvm_msi msi; - - r = -EFAULT; - if (copy_from_user(&msi, argp, sizeof(msi))) - goto out; - r = kvm_send_userspace_msi(kvm, &msi); - break; - } -#endif #ifdef __KVM_HAVE_IRQ_LINE case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { -- 2.53.0 From: Joerg Roedel Allow the KVM_SET_GSI_ROUTING on each plane. There is still only one GSI routing table maintained per VM by the KVM module, the plane this IOCTL was last issued at will get all GSI interrupts. Signed-off-by: Joerg Roedel --- arch/arm64/kvm/vgic/vgic-irqfd.c | 2 +- arch/loongarch/kvm/intc/pch_pic.c | 2 +- arch/powerpc/kvm/mpic.c | 2 +- arch/riscv/kvm/vm.c | 2 +- arch/x86/kvm/irq.c | 2 +- include/linux/kvm_host.h | 3 +- virt/kvm/irqchip.c | 10 +++-- virt/kvm/kvm_main.c | 62 +++++++++++++++---------------- 8 files changed, 44 insertions(+), 41 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 53e5fcc591d7..96981dd29a6e 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -153,7 +153,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) entries[i].u.irqchip.irqchip = 0; entries[i].u.irqchip.pin = i; } - ret = kvm_set_irq_routing(kvm, entries, nr, 0); + ret = kvm_set_irq_routing(kvm, entries, nr, 0, 0); kfree(entries); return ret; } diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index aa0ed59ae8cf..a0d04ffa8a71 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -423,7 +423,7 @@ static int kvm_setup_default_irq_routing(struct kvm *kvm) entries[i].u.irqchip.irqchip = 0; entries[i].u.irqchip.pin = i; } - ret = kvm_set_irq_routing(kvm, entries, nr, 0); + ret = kvm_set_irq_routing(kvm, entries, nr, 0, 0); kfree(entries); return ret; diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 6b6eba7fbf75..1e493179ee4f 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1646,7 +1646,7 @@ static int mpic_set_default_irq_routing(struct openpic *opp) if (!routing) return -ENOMEM; - kvm_set_irq_routing(opp->kvm, routing, 0, 0); + kvm_set_irq_routing(opp->kvm, routing, 0, 0, 0); kfree(routing); return 0; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 6b3c8a0e74e2..bd9ab3240e4f 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -105,7 +105,7 @@ int kvm_riscv_setup_default_irq_routing(struct kvm *kvm, u32 lines) ents[i].u.irqchip.irqchip = 0; ents[i].u.irqchip.pin = i; } - rc = kvm_set_irq_routing(kvm, ents, lines, 0); + rc = kvm_set_irq_routing(kvm, ents, lines, 0, 0); kfree(ents); return rc; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index d2ecfd54d57a..90e2d2db2123 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -576,7 +576,7 @@ static const struct kvm_irq_routing_entry default_routing[] = { int kvm_setup_default_ioapic_and_pic_routing(struct kvm *kvm) { return kvm_set_irq_routing(kvm, default_routing, - ARRAY_SIZE(default_routing), 0); + ARRAY_SIZE(default_routing), 0, 0); } int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 05a10836d92d..3b62fb354267 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2351,7 +2351,8 @@ bool kvm_arch_can_set_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, - unsigned flags); + unsigned flags, + unsigned plane_level); int kvm_init_irq_routing(struct kvm *kvm); int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index a4fea7d8dde6..e0793ae0c719 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -128,7 +128,8 @@ void kvm_free_irq_routing(struct kvm *kvm) static int setup_routing_entry(struct kvm *kvm, struct kvm_irq_routing_table *rt, struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) + const struct kvm_irq_routing_entry *ue, + unsigned plane_level) { struct kvm_kernel_irq_routing_entry *ei; int r; @@ -146,7 +147,7 @@ static int setup_routing_entry(struct kvm *kvm, e->gsi = gsi; e->type = ue->type; - r = kvm_set_routing_entry(kvm, e, ue, 0); + r = kvm_set_routing_entry(kvm, e, ue, plane_level); if (r) return r; if (e->type == KVM_IRQ_ROUTING_IRQCHIP) @@ -169,7 +170,8 @@ bool __weak kvm_arch_can_set_irq_routing(struct kvm *kvm) int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *ue, unsigned nr, - unsigned flags) + unsigned flags, + unsigned plane_level) { struct kvm_irq_routing_table *new, *old; struct kvm_kernel_irq_routing_entry *e; @@ -210,7 +212,7 @@ int kvm_set_irq_routing(struct kvm *kvm, goto free_entry; break; } - r = setup_routing_entry(kvm, new, e, ue); + r = setup_routing_entry(kvm, new, e, ue, plane_level); if (r) goto free_entry; ++ue; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dc59f2f9d405..a6d7601c3412 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4918,6 +4918,34 @@ static long __kvm_plane_ioctl(struct kvm_plane *plane, unsigned int ioctl, unsig break; } #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + void __user *argp = (void __user *)arg; + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries = NULL; + + if (copy_from_user(&routing, argp, sizeof(routing))) + return -EFAULT; + if (!kvm_arch_can_set_irq_routing(plane->kvm) || + routing.nr > KVM_MAX_IRQ_ROUTES || + routing.flags) + return -EINVAL; + if (routing.nr) { + urouting = argp; + entries = vmemdup_array_user(urouting->entries, + routing.nr, sizeof(*entries)); + if (IS_ERR(entries)) { + r = PTR_ERR(entries); + return r; + } + } + r = kvm_set_irq_routing(plane->kvm, entries, routing.nr, + routing.flags, plane->level); + kvfree(entries); + break; + } +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ default: r = -ENOTTY; } @@ -5506,6 +5534,9 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_CREATE_VCPU: #ifdef CONFIG_HAVE_KVM_MSI case KVM_SIGNAL_MSI: +#endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: #endif r = __kvm_plane_ioctl(kvm->planes[0], ioctl, arg); break; @@ -5635,37 +5666,6 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_SET_GSI_ROUTING: { - struct kvm_irq_routing routing; - struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries = NULL; - - r = -EFAULT; - if (copy_from_user(&routing, argp, sizeof(routing))) - goto out; - r = -EINVAL; - if (!kvm_arch_can_set_irq_routing(kvm)) - goto out; - if (routing.nr > KVM_MAX_IRQ_ROUTES) - goto out; - if (routing.flags) - goto out; - if (routing.nr) { - urouting = argp; - entries = vmemdup_array_user(urouting->entries, - routing.nr, sizeof(*entries)); - if (IS_ERR(entries)) { - r = PTR_ERR(entries); - goto out; - } - } - r = kvm_set_irq_routing(kvm, entries, routing.nr, - routing.flags); - kvfree(entries); - break; - } -#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES case KVM_SET_MEMORY_ATTRIBUTES: { struct kvm_memory_attributes attrs; -- 2.53.0 From: Joerg Roedel Make sure the handling of IOAPIC EOIs is aware of planes. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/irq.c | 3 ++- arch/x86/kvm/x86.c | 10 ++++++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1393566741a0..134bc02962fd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -109,7 +109,7 @@ #define KVM_REQ_APIC_PAGE_RELOAD \ KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) -#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) +#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_PLANE_REQ(19) #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 90e2d2db2123..bc748a4b7cbd 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -400,7 +400,8 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, hlist_for_each_entry(entry, &table->map[i], link) { struct kvm_lapic_irq irq; - if (entry->type != KVM_IRQ_ROUTING_MSI) + if (entry->type != KVM_IRQ_ROUTING_MSI || + entry->msi.plane_level != vcpu->plane_level) continue; kvm_msi_to_lapic_irq(vcpu->kvm, entry, &irq); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 623838885753..a158740a6fc1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11336,8 +11336,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } } - if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) - vcpu_scan_ioapic(vcpu); + if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) { + struct kvm_vcpu *v; + unsigned i; + + vcpu_for_each_plane(vcpu->common, i, v) { + vcpu_scan_ioapic(v); + } + } if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu)) vcpu_load_eoi_exitmap(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) -- 2.53.0 From: Paolo Bonzini Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm_host.h | 24 +++++------ arch/x86/kvm/i8254.c | 2 +- arch/x86/kvm/lapic.c | 71 +++++++++++++++++---------------- arch/x86/kvm/x86.c | 18 +++++++-- 4 files changed, 61 insertions(+), 54 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 134bc02962fd..11e52f8bb2c2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1418,16 +1418,17 @@ enum kvm_mmu_type { }; /* Per-plane state of VM */ -struct kvm_arch_plane {}; +struct kvm_arch_plane { + atomic_t vapics_in_nmi_mode; -static inline int kvm_arch_plane_init(struct kvm *kvm, - struct kvm_plane *plane, - unsigned plane_level) -{ - return 0; -} + struct mutex apic_map_lock; + struct kvm_apic_map __rcu *apic_map; + atomic_t apic_map_dirty; +}; -static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {} +int kvm_arch_plane_init(struct kvm *kvm, struct kvm_plane *plane, + unsigned plane_level); +void kvm_arch_plane_destroy(struct kvm_plane *plane); struct kvm_arch { unsigned long n_used_mmu_pages; @@ -1465,11 +1466,6 @@ struct kvm_arch { struct kvm_ioapic *vioapic; struct kvm_pit *vpit; #endif - atomic_t vapics_in_nmi_mode; - - struct mutex apic_map_lock; - struct kvm_apic_map __rcu *apic_map; - atomic_t apic_map_dirty; bool apic_access_memslot_enabled; bool apic_access_memslot_inhibited; @@ -2458,7 +2454,7 @@ int kvm_cpu_get_extint(struct kvm_vcpu *v); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); -int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, +int kvm_pv_send_ipi(struct kvm_vcpu *kvm_vcpu, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 1982b0077ddd..bfe590378bd2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -260,7 +260,7 @@ static void pit_do_work(struct kthread_work *work) * VCPUs and only when LVT0 is in NMI mode. The interrupt can * also be simultaneously delivered through PIC and IOAPIC. */ - if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) + if (atomic_read(&kvm->planes[0]->arch.vapics_in_nmi_mode) > 0) kvm_for_each_vcpu(i, vcpu, kvm) kvm_apic_nmi_wd_deliver(vcpu); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1b763f164951..06a12b49fafa 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -403,36 +403,37 @@ enum { DIRTY }; -static void kvm_recalculate_apic_map(struct kvm *kvm) +static void kvm_recalculate_apic_map(struct kvm_plane *plane) { struct kvm_apic_map *new, *old = NULL; + struct kvm *kvm = plane->kvm; struct kvm_vcpu *vcpu; unsigned long i; u32 max_id = 255; /* enough space for any xAPIC ID */ bool xapic_id_mismatch; int r; - /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ - if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) + /* Read plane->arch.apic_map_dirty before plane->arch.apic_map. */ + if (atomic_read_acquire(&plane->arch.apic_map_dirty) == CLEAN) return; - WARN_ONCE(!irqchip_in_kernel(kvm), + WARN_ONCE(!irqchip_in_kernel(plane->kvm), "Dirty APIC map without an in-kernel local APIC"); - mutex_lock(&kvm->arch.apic_map_lock); + mutex_lock(&plane->arch.apic_map_lock); retry: /* - * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean) + * Read plane->arch.apic_map_dirty before plane->arch.apic_map (if clean) * or the APIC registers (if dirty). Note, on retry the map may have * not yet been marked dirty by whatever task changed a vCPU's x2APIC * ID, i.e. the map may still show up as in-progress. In that case * this task still needs to retry and complete its calculation. */ - if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, + if (atomic_cmpxchg_acquire(&plane->arch.apic_map_dirty, DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { /* Someone else has updated the map. */ - mutex_unlock(&kvm->arch.apic_map_lock); + mutex_unlock(&plane->arch.apic_map_lock); return; } @@ -445,7 +446,7 @@ static void kvm_recalculate_apic_map(struct kvm *kvm) */ xapic_id_mismatch = false; - kvm_for_each_vcpu(i, vcpu, kvm) + plane_for_each_vcpu(i, vcpu, plane) if (kvm_apic_present(vcpu)) max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); @@ -459,7 +460,7 @@ static void kvm_recalculate_apic_map(struct kvm *kvm) new->max_apic_id = max_id; new->logical_mode = KVM_APIC_MODE_SW_DISABLED; - kvm_for_each_vcpu(i, vcpu, kvm) { + plane_for_each_vcpu(i, vcpu, plane) { if (!kvm_apic_present(vcpu)) continue; @@ -498,16 +499,16 @@ static void kvm_recalculate_apic_map(struct kvm *kvm) else kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); - old = rcu_dereference_protected(kvm->arch.apic_map, - lockdep_is_held(&kvm->arch.apic_map_lock)); - rcu_assign_pointer(kvm->arch.apic_map, new); + old = rcu_dereference_protected(plane->arch.apic_map, + lockdep_is_held(&plane->arch.apic_map_lock)); + rcu_assign_pointer(plane->arch.apic_map, new); /* - * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. + * Write kvm->arch.apic_map before clearing plane->apic_map_dirty. * If another update has come in, leave it DIRTY. */ - atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, + atomic_cmpxchg_release(&plane->arch.apic_map_dirty, UPDATE_IN_PROGRESS, CLEAN); - mutex_unlock(&kvm->arch.apic_map_lock); + mutex_unlock(&plane->arch.apic_map_lock); if (old) kvfree_rcu(old, rcu); @@ -528,7 +529,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) else static_branch_inc(&apic_sw_disabled.key); - atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); + atomic_set_release(&apic->vcpu->plane->arch.apic_map_dirty, DIRTY); } /* Check if there are APF page ready requests pending */ @@ -541,19 +542,19 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) { kvm_lapic_set_reg(apic, APIC_ID, id << 24); - atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); + atomic_set_release(&apic->vcpu->plane->arch.apic_map_dirty, DIRTY); } static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) { kvm_lapic_set_reg(apic, APIC_LDR, id); - atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); + atomic_set_release(&apic->vcpu->plane->arch.apic_map_dirty, DIRTY); } static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val) { kvm_lapic_set_reg(apic, APIC_DFR, val); - atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); + atomic_set_release(&apic->vcpu->plane->arch.apic_map_dirty, DIRTY); } static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) @@ -564,7 +565,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); - atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); + atomic_set_release(&apic->vcpu->plane->arch.apic_map_dirty, DIRTY); } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) @@ -860,7 +861,7 @@ static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, return count; } -int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, +int kvm_pv_send_ipi(struct kvm_vcpu *vcpu, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit) { @@ -878,7 +879,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, irq.trig_mode = icr & APIC_INT_LEVELTRIG; rcu_read_lock(); - map = rcu_dereference(kvm->arch.apic_map); + map = rcu_dereference(vcpu->plane->arch.apic_map); count = -EOPNOTSUPP; if (likely(map)) { @@ -1240,7 +1241,7 @@ static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *s } rcu_read_lock(); - map = rcu_dereference(kvm->arch.apic_map); + map = rcu_dereference(kvm->planes[0]->arch.apic_map); ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); if (ret) { @@ -1290,7 +1291,7 @@ static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, return false; rcu_read_lock(); - map = rcu_dereference(kvm->arch.apic_map); + map = rcu_dereference(kvm->planes[0]->arch.apic_map); if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && hweight16(bitmap) == 1) { @@ -1511,7 +1512,7 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, bool ret; rcu_read_lock(); - map = rcu_dereference(kvm->arch.apic_map); + map = rcu_dereference(kvm->planes[0]->arch.apic_map); ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, &bitmap); @@ -2389,9 +2390,9 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; if (lvt0_in_nmi_mode) { - atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); + atomic_inc(&apic->vcpu->plane->arch.vapics_in_nmi_mode); } else - atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); + atomic_dec(&apic->vcpu->plane->arch.vapics_in_nmi_mode); } } @@ -2551,7 +2552,7 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) * was toggled, the APIC ID changed, etc... The maps are marked dirty * on relevant changes, i.e. this is a nop for most writes. */ - kvm_recalculate_apic_map(apic->vcpu->kvm); + kvm_recalculate_apic_map(apic->vcpu->plane); return ret; } @@ -2767,7 +2768,7 @@ static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value) kvm_make_request(KVM_REQ_APF_READY, vcpu); } else { static_branch_inc(&apic_hw_disabled.key); - atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); + atomic_set_release(&apic->vcpu->plane->arch.apic_map_dirty, DIRTY); } } @@ -2814,7 +2815,7 @@ int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated) } __kvm_apic_set_base(vcpu, value); - kvm_recalculate_apic_map(vcpu->kvm); + kvm_recalculate_apic_map(vcpu->plane); return 0; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_set_base); @@ -2983,7 +2984,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; - kvm_recalculate_apic_map(vcpu->kvm); + kvm_recalculate_apic_map(vcpu->plane); } /* @@ -3271,13 +3272,13 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) r = kvm_apic_state_fixup(vcpu, s, true); if (r) { - kvm_recalculate_apic_map(vcpu->kvm); + kvm_recalculate_apic_map(vcpu->plane); return r; } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); - atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); - kvm_recalculate_apic_map(vcpu->kvm); + atomic_set_release(&apic->vcpu->plane->arch.apic_map_dirty, DIRTY); + kvm_recalculate_apic_map(vcpu->plane); kvm_apic_set_version(vcpu); apic_update_ppr(apic); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a158740a6fc1..070f87ae23eb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10441,7 +10441,7 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) goto no_yield; rcu_read_lock(); - map = rcu_dereference(vcpu->kvm->arch.apic_map); + map = rcu_dereference(vcpu->plane->arch.apic_map); if (likely(map) && dest_id <= map->max_apic_id) { dest_id = array_index_nospec(dest_id, map->max_apic_id + 1); @@ -10528,7 +10528,7 @@ int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, int cpl, if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI)) break; - ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); + ret = kvm_pv_send_ipi(vcpu, a0, a1, a2, a3, op_64_bit); break; case KVM_HC_SCHED_YIELD: if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD)) @@ -13397,6 +13397,18 @@ void kvm_arch_free_vm(struct kvm *kvm) __kvm_arch_free_vm(kvm); } +int kvm_arch_plane_init(struct kvm *kvm, struct kvm_plane *plane, + unsigned plane_level) +{ + mutex_init(&plane->arch.apic_map_lock); + + return 0; +} + +void kvm_arch_plane_destroy(struct kvm_plane *plane) +{ + kvfree(rcu_dereference_check(plane->arch.apic_map, 1)); +} int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { @@ -13429,7 +13441,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) atomic_set(&kvm->arch.noncoherent_dma_count, 0); raw_spin_lock_init(&kvm->arch.tsc_write_lock); - mutex_init(&kvm->arch.apic_map_lock); seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock); ratelimit_state_init(&kvm->arch.kvmclock_update_rs, HZ, 10); ratelimit_set_flags(&kvm->arch.kvmclock_update_rs, RATELIMIT_MSG_ON_RELEASE); @@ -13587,7 +13598,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); #endif - kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); kvm_page_track_cleanup(kvm); -- 2.53.0 From: Paolo Bonzini Make the local apic code aware of planes and only operate on APICs within the same plane level. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/x86/kvm/hyperv.c | 2 +- arch/x86/kvm/ioapic.c | 8 +++---- arch/x86/kvm/irq.c | 8 ++++--- arch/x86/kvm/lapic.c | 50 +++++++++++++++++++++---------------------- arch/x86/kvm/lapic.h | 12 +++++------ arch/x86/kvm/x86.c | 6 +++--- arch/x86/kvm/xen.c | 2 +- 7 files changed, 45 insertions(+), 43 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 4438ecac9a89..0a5d8e302f32 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -492,7 +492,7 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) irq.vector = vector; irq.level = 1; - ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq); + ret = kvm_irq_delivery_to_apic(vcpu->plane, vcpu->arch.apic, &irq); trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); return ret; } diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index eed96ff6e722..539edee73047 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -429,7 +429,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) irq.dest_id = e->fields.dest_id; irq.msi_redir_hint = false; bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); - kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, + kvm_bitmap_or_dest_vcpus(ioapic->kvm->planes[0], &irq, vcpu_bitmap); if (old_dest_mode != e->fields.dest_mode || old_dest_id != e->fields.dest_id) { @@ -442,7 +442,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) irq.dest_mode = kvm_lapic_irq_dest_mode( !!e->fields.dest_mode); - kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, + kvm_bitmap_or_dest_vcpus(ioapic->kvm->planes[0], &irq, vcpu_bitmap); } kvm_make_scan_ioapic_request_mask(ioapic->kvm, @@ -485,11 +485,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) * if rtc_irq_check_coalesced returns false). */ BUG_ON(ioapic->rtc_status.pending_eoi != 0); - ret = __kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, + ret = __kvm_irq_delivery_to_apic(ioapic->kvm->planes[0], NULL, &irqe, &ioapic->rtc_status); ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); } else - ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); + ret = kvm_irq_delivery_to_apic(ioapic->kvm->planes[0], NULL, &irqe); if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) entry->fields.remote_irr = 1; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index bc748a4b7cbd..3bf2ecfd9cb4 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -226,6 +226,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { struct kvm_lapic_irq irq; + struct kvm_plane *plane; if (kvm_msi_route_invalid(kvm, e)) return -EINVAL; @@ -234,8 +235,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return -1; kvm_msi_to_lapic_irq(kvm, e, &irq); + plane = kvm->planes[e->msi.plane_level]; - return kvm_irq_delivery_to_apic(kvm, NULL, &irq); + return kvm_irq_delivery_to_apic(plane, NULL, &irq); } int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, @@ -258,7 +260,7 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, kvm_msi_to_lapic_irq(kvm, e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) + if (kvm_irq_delivery_to_apic_fast(kvm->planes[e->msi.plane_level], NULL, &irq, &r)) return r; break; @@ -453,7 +455,7 @@ static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd, * if they have a single CPU as the destination, e.g. only if * the guest has affined the interrupt to a single vCPU. */ - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || + if (!kvm_intr_is_single_vcpu(kvm->planes[0], &irq, &vcpu) || !kvm_irq_is_postable(&irq)) vcpu = NULL; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 06a12b49fafa..cac076445472 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1153,7 +1153,7 @@ static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) * means that the interrupt should be dropped. In this case, *bitmap would be * zero and *dst undefined. */ -static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, +static inline bool kvm_apic_map_get_dest_lapic(struct kvm_plane *plane, struct kvm_lapic **src, struct kvm_lapic_irq *irq, struct kvm_apic_map *map, struct kvm_lapic ***dst, unsigned long *bitmap) @@ -1167,7 +1167,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, } else if (irq->shorthand) return false; - if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map)) + if (!map || kvm_apic_is_broadcast_dest(plane->kvm, src, irq, map)) return false; if (irq->dest_mode == APIC_DEST_PHYSICAL) { @@ -1208,7 +1208,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, bitmap, 16); if (!(*dst)[lowest]) { - kvm_apic_disabled_lapic_found(kvm); + kvm_apic_disabled_lapic_found(plane->kvm); *bitmap = 0; return true; } @@ -1219,7 +1219,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, return true; } -static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, +static bool __kvm_irq_delivery_to_apic_fast(struct kvm_plane *plane, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, struct rtc_status *rtc_status) { @@ -1232,7 +1232,7 @@ static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *s *r = -1; if (irq->shorthand == APIC_DEST_SELF) { - if (KVM_BUG_ON(!src, kvm)) { + if (KVM_BUG_ON(!src, plane->kvm)) { *r = 0; return true; } @@ -1241,9 +1241,9 @@ static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *s } rcu_read_lock(); - map = rcu_dereference(kvm->planes[0]->arch.apic_map); + map = rcu_dereference(plane->arch.apic_map); - ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); + ret = kvm_apic_map_get_dest_lapic(plane, &src, irq, map, &dst, &bitmap); if (ret) { *r = 0; for_each_set_bit(i, &bitmap, 16) { @@ -1258,10 +1258,10 @@ static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *s } -bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, +bool kvm_irq_delivery_to_apic_fast(struct kvm_plane *plane, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r) { - return __kvm_irq_delivery_to_apic_fast(kvm, src, irq, r, NULL); + return __kvm_irq_delivery_to_apic_fast(plane, src, irq, r, NULL); } /* @@ -1278,7 +1278,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, * interrupt. * - Otherwise, use remapped mode to inject the interrupt. */ -static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, +static bool kvm_intr_is_single_vcpu_fast(struct kvm_plane *plane, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu) { @@ -1291,9 +1291,9 @@ static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, return false; rcu_read_lock(); - map = rcu_dereference(kvm->planes[0]->arch.apic_map); + map = rcu_dereference(plane->arch.apic_map); - if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && + if (kvm_apic_map_get_dest_lapic(plane, NULL, irq, map, &dst, &bitmap) && hweight16(bitmap) == 1) { unsigned long i = find_first_bit(&bitmap, 16); @@ -1307,17 +1307,17 @@ static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, return ret; } -bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, +bool kvm_intr_is_single_vcpu(struct kvm_plane *plane, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu) { int r = 0; unsigned long i; struct kvm_vcpu *vcpu; - if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) + if (kvm_intr_is_single_vcpu_fast(plane, irq, dest_vcpu)) return true; - kvm_for_each_vcpu(i, vcpu, kvm) { + plane_for_each_vcpu(i, vcpu, plane) { if (!kvm_apic_present(vcpu)) continue; @@ -1335,7 +1335,7 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_intr_is_single_vcpu); -int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, +int __kvm_irq_delivery_to_apic(struct kvm_plane *plane, struct kvm_lapic *src, struct kvm_lapic_irq *irq, struct rtc_status *rtc_status) { @@ -1344,7 +1344,7 @@ int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; unsigned int dest_vcpus = 0; - if (__kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, rtc_status)) + if (__kvm_irq_delivery_to_apic_fast(plane, src, irq, &r, rtc_status)) return r; if (irq->dest_mode == APIC_DEST_PHYSICAL && @@ -1355,7 +1355,7 @@ int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); - kvm_for_each_vcpu(i, vcpu, kvm) { + plane_for_each_vcpu(i, vcpu, plane) { if (!kvm_apic_present(vcpu)) continue; @@ -1384,7 +1384,7 @@ int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, int idx = kvm_vector_to_index(irq->vector, dest_vcpus, dest_vcpu_bitmap, KVM_MAX_VCPUS); - lowest = kvm_get_vcpu(kvm, idx); + lowest = plane_get_vcpu(plane, idx); } if (lowest) @@ -1500,7 +1500,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, * out the destination vcpus array and set the bitmap or it traverses to * each available vcpu to identify the same. */ -void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, +void kvm_bitmap_or_dest_vcpus(struct kvm_plane *plane, struct kvm_lapic_irq *irq, unsigned long *vcpu_bitmap) { struct kvm_lapic **dest_vcpu = NULL; @@ -1512,9 +1512,9 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, bool ret; rcu_read_lock(); - map = rcu_dereference(kvm->planes[0]->arch.apic_map); + map = rcu_dereference(plane->arch.apic_map); - ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, + ret = kvm_apic_map_get_dest_lapic(plane, &src, irq, map, &dest_vcpu, &bitmap); if (ret) { for_each_set_bit(i, &bitmap, 16) { @@ -1524,7 +1524,7 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, __set_bit(vcpu_idx, vcpu_bitmap); } } else { - kvm_for_each_vcpu(i, vcpu, kvm) { + plane_for_each_vcpu(i, vcpu, plane) { if (!kvm_apic_present(vcpu)) continue; if (!kvm_apic_match_dest(vcpu, NULL, @@ -1651,7 +1651,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) trace_kvm_apic_ipi(icr_low, irq.dest_id); - kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); + kvm_irq_delivery_to_apic(apic->vcpu->plane, apic, &irq); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_send_ipi); @@ -2619,7 +2619,7 @@ static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast) kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq); - if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq, + if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->plane, apic, &irq, &ignored)) return -EWOULDBLOCK; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index afd440c88981..a9ede0e145d9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -116,17 +116,17 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu); int kvm_alloc_apic_access_page(struct kvm *kvm); void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu); -bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, +bool kvm_irq_delivery_to_apic_fast(struct kvm_plane *plane, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r); -int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, +int __kvm_irq_delivery_to_apic(struct kvm_plane *plane, struct kvm_lapic *src, struct kvm_lapic_irq *irq, struct rtc_status *rtc_status); -static inline int kvm_irq_delivery_to_apic(struct kvm *kvm, +static inline int kvm_irq_delivery_to_apic(struct kvm_plane *plane, struct kvm_lapic *src, struct kvm_lapic_irq *irq) { - return __kvm_irq_delivery_to_apic(kvm, src, irq, NULL); + return __kvm_irq_delivery_to_apic(plane, src, irq, NULL); } void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); @@ -244,10 +244,10 @@ bool kvm_lapic_suppress_eoi_broadcast(struct kvm_lapic *apic); void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); -void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, +void kvm_bitmap_or_dest_vcpus(struct kvm_plane *plane, struct kvm_lapic_irq *irq, unsigned long *vcpu_bitmap); -bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, +bool kvm_intr_is_single_vcpu(struct kvm_plane *plane, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu); void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 070f87ae23eb..7fc08df245bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10373,7 +10373,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, * * @apicid - apicid of vcpu to be kicked. */ -static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) +static void kvm_pv_kick_cpu_op(struct kvm_plane *plane, int apicid) { /* * All other fields are unused for APIC_DM_REMRD, but may be consumed by @@ -10386,7 +10386,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) .dest_id = apicid, }; - kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq); + kvm_irq_delivery_to_apic(plane, NULL, &lapic_irq); } bool kvm_apicv_activated(struct kvm *kvm) @@ -10515,7 +10515,7 @@ int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, int cpl, if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT)) break; - kvm_pv_kick_cpu_op(vcpu->kvm, a1); + kvm_pv_kick_cpu_op(vcpu->plane, a1); kvm_sched_yield(vcpu, a1); ret = 0; break; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 91fd3673c09a..06c5789f406b 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -626,7 +626,7 @@ void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) irq.delivery_mode = APIC_DM_FIXED; irq.level = 1; - kvm_irq_delivery_to_apic(v->kvm, NULL, &irq); + kvm_irq_delivery_to_apic(v->plane, NULL, &irq); } /* -- 2.53.0 From: Joerg Roedel The CPUID state is shared across all planes, so move it to struct kvm_vcpu_arch_common. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm_host.h | 17 ++++++++-------- arch/x86/kvm/cpuid.c | 36 +++++++++++++++++++-------------- arch/x86/kvm/cpuid.h | 14 ++++++++++--- arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/smm.c | 2 +- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 17 ++++++++++++---- 8 files changed, 58 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 11e52f8bb2c2..3a64bdae6e23 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -794,10 +794,16 @@ enum kvm_only_cpuid_leafs { NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; -struct kvm_vcpu_arch_common {}; +struct kvm_vcpu_arch_common { + /* CPUID related state */ + int cpuid_nent; + struct kvm_cpuid_entry2 *cpuid_entries; + bool cpuid_dynamic_bits_dirty; + bool is_amd_compatible; +}; -static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; } -static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {} +int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common); +void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common); struct kvm_vcpu_arch { /* @@ -919,11 +925,6 @@ struct kvm_vcpu_arch { int halt_request; /* real mode on Intel only */ - int cpuid_nent; - struct kvm_cpuid_entry2 *cpuid_entries; - bool cpuid_dynamic_bits_dirty; - bool is_amd_compatible; - /* * cpu_caps holds the effective guest capabilities, i.e. the features * the vCPU is allowed to use. Typically, but not always, features can diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e69156b54cff..6d948d63306c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -176,6 +176,7 @@ static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int nent) { + struct kvm_vcpu_common *common = vcpu->common; struct kvm_cpuid_entry2 *orig; int i; @@ -188,11 +189,11 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 kvm_update_cpuid_runtime(vcpu); kvm_apply_cpuid_pv_features_quirk(vcpu); - if (nent != vcpu->arch.cpuid_nent) + if (nent != common->arch.cpuid_nent) return -EINVAL; for (i = 0; i < nent; i++) { - orig = &vcpu->arch.cpuid_entries[i]; + orig = &common->arch.cpuid_entries[i]; if (e2[i].function != orig->function || e2[i].index != orig->index || e2[i].flags != orig->flags || @@ -290,7 +291,7 @@ static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; - vcpu->arch.cpuid_dynamic_bits_dirty = false; + vcpu->common->arch.cpuid_dynamic_bits_dirty = false; best = kvm_find_cpuid_entry(vcpu, 1); if (best) { @@ -374,6 +375,7 @@ static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func, void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_common *common = vcpu->common; struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_cpuid_entry2 *best; struct kvm_cpuid_entry2 *entry; @@ -443,7 +445,7 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.pv_cpuid.features = kvm_apply_cpuid_pv_features_quirk(vcpu); - vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); + common->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); @@ -509,6 +511,7 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu) static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int nent) { + struct kvm_vcpu_common *common = vcpu->common; u32 vcpu_caps[NR_KVM_CPU_CAPS]; int r; @@ -516,7 +519,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, * Apply pending runtime CPUID updates to the current CPUID entries to * avoid false positives due to mismatches on KVM-owned feature flags. */ - if (vcpu->arch.cpuid_dynamic_bits_dirty) + if (common->arch.cpuid_dynamic_bits_dirty) kvm_update_cpuid_runtime(vcpu); /* @@ -530,8 +533,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, * updates. Full initialization is done if and only if the vCPU hasn't * run, i.e. only if userspace is potentially changing CPUID features. */ - swap(vcpu->arch.cpuid_entries, e2); - swap(vcpu->arch.cpuid_nent, nent); + swap(common->arch.cpuid_entries, e2); + swap(common->arch.cpuid_nent, nent); memcpy(vcpu_caps, vcpu->arch.cpu_caps, sizeof(vcpu_caps)); BUILD_BUG_ON(sizeof(vcpu_caps) != sizeof(vcpu->arch.cpu_caps)); @@ -580,8 +583,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, err: memcpy(vcpu->arch.cpu_caps, vcpu_caps, sizeof(vcpu_caps)); - swap(vcpu->arch.cpuid_entries, e2); - swap(vcpu->arch.cpuid_nent, nent); + swap(common->arch.cpuid_entries, e2); + swap(common->arch.cpuid_nent, nent); return r; } @@ -658,17 +661,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { - if (cpuid->nent < vcpu->arch.cpuid_nent) + struct kvm_vcpu_common *common = vcpu->common; + + if (cpuid->nent < common->arch.cpuid_nent) return -E2BIG; - if (vcpu->arch.cpuid_dynamic_bits_dirty) + if (common->arch.cpuid_dynamic_bits_dirty) kvm_update_cpuid_runtime(vcpu); - if (copy_to_user(entries, vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + if (copy_to_user(entries, common->arch.cpuid_entries, + common->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) return -EFAULT; - cpuid->nent = vcpu->arch.cpuid_nent; + cpuid->nent = common->arch.cpuid_nent; return 0; } @@ -2089,10 +2094,11 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only) { u32 orig_function = *eax, function = *eax, index = *ecx; + struct kvm_vcpu_common *common = vcpu->common; struct kvm_cpuid_entry2 *entry; bool exact, used_max_basic = false; - if (vcpu->arch.cpuid_dynamic_bits_dirty) + if (common->arch.cpuid_dynamic_bits_dirty) kvm_update_cpuid_runtime(vcpu); entry = kvm_find_cpuid_entry_index(vcpu, function, index); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 039b8e6f40ba..143ea8531611 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -36,14 +36,18 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry2(struct kvm_cpuid_entry2 *entries, static inline struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu, u32 function, u32 index) { - return kvm_find_cpuid_entry2(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent, + struct kvm_vcpu_common *common = vcpu->common; + + return kvm_find_cpuid_entry2(common->arch.cpuid_entries, common->arch.cpuid_nent, function, index); } static inline struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function) { - return kvm_find_cpuid_entry2(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent, + struct kvm_vcpu_common *common = vcpu->common; + + return kvm_find_cpuid_entry2(common->arch.cpuid_entries, common->arch.cpuid_nent, function, KVM_CPUID_INDEX_NOT_SIGNIFICANT); } @@ -135,7 +139,7 @@ static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) { - return vcpu->arch.is_amd_compatible; + return vcpu->common->arch.is_amd_compatible; } static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) @@ -300,4 +304,8 @@ static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu) guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB)); } +static inline void cpuid_set_dirty(struct kvm_vcpu *vcpu) +{ + vcpu->common->arch.cpuid_dynamic_bits_dirty = true; +} #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cac076445472..dc7a08831a54 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2754,7 +2754,7 @@ static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value) vcpu->arch.apic_base = value; if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) - vcpu->arch.cpuid_dynamic_bits_dirty = true; + cpuid_set_dirty(vcpu); if (!apic) return; diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index f623c5986119..736ab345b9fd 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -363,7 +363,7 @@ void enter_smm(struct kvm_vcpu *vcpu) goto error; #endif - vcpu->arch.cpuid_dynamic_bits_dirty = true; + cpuid_set_dirty(vcpu); kvm_mmu_reset_context(vcpu); return; error: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e8ad880a4266..612db7ad8b2a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1848,7 +1848,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) - vcpu->arch.cpuid_dynamic_bits_dirty = true; + cpuid_set_dirty(vcpu); } static void svm_set_segment(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 20262855bfe8..62e180651143 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3595,7 +3595,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(GUEST_CR4, hw_cr4); if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) - vcpu->arch.cpuid_dynamic_bits_dirty = true; + cpuid_set_dirty(vcpu); } void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fc08df245bd..7e94a378b3d2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1322,7 +1322,7 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) - vcpu->arch.cpuid_dynamic_bits_dirty = true; + cpuid_set_dirty(vcpu); return 0; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_set_xcr); @@ -4089,7 +4089,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3)) return 1; vcpu->arch.ia32_misc_enable_msr = data; - vcpu->arch.cpuid_dynamic_bits_dirty = true; + cpuid_set_dirty(vcpu); } else { vcpu->arch.ia32_misc_enable_msr = data; } @@ -4121,7 +4121,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (vcpu->arch.ia32_xss == data) break; vcpu->arch.ia32_xss = data; - vcpu->arch.cpuid_dynamic_bits_dirty = true; + cpuid_set_dirty(vcpu); break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) @@ -13034,7 +13034,16 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); free_page((unsigned long)vcpu->arch.pio_data); - kvfree(vcpu->arch.cpuid_entries); +} + +int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) +{ + return 0; +} + +void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) +{ + kvfree(common->arch.cpuid_entries); } static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) -- 2.53.0 From: Joerg Roedel Now that CPUID state is shared across all planes, cpu_caps can be shared as well. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm_host.h | 33 +++++++++++++++++---------------- arch/x86/kvm/cpuid.c | 18 +++++++++--------- arch/x86/kvm/cpuid.h | 17 +++++++++-------- arch/x86/kvm/svm/svm.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 2 +- 5 files changed, 38 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a64bdae6e23..b0d040528f9d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -800,6 +800,23 @@ struct kvm_vcpu_arch_common { struct kvm_cpuid_entry2 *cpuid_entries; bool cpuid_dynamic_bits_dirty; bool is_amd_compatible; + + /* + * cpu_caps holds the effective guest capabilities, i.e. the features + * the vCPU is allowed to use. Typically, but not always, features can + * be used by the guest if and only if both KVM and userspace want to + * expose the feature to the guest. + * + * A common exception is for virtualization holes, i.e. when KVM can't + * prevent the guest from using a feature, in which case the vCPU "has" + * the feature regardless of what KVM or userspace desires. + * + * Note, features that don't require KVM involvement in any way are + * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the + * guest CPUID provided by userspace. + */ + u32 cpu_caps[NR_KVM_CPU_CAPS]; + }; int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common); @@ -925,22 +942,6 @@ struct kvm_vcpu_arch { int halt_request; /* real mode on Intel only */ - /* - * cpu_caps holds the effective guest capabilities, i.e. the features - * the vCPU is allowed to use. Typically, but not always, features can - * be used by the guest if and only if both KVM and userspace want to - * expose the feature to the guest. - * - * A common exception is for virtualization holes, i.e. when KVM can't - * prevent the guest from using a feature, in which case the vCPU "has" - * the feature regardless of what KVM or userspace desires. - * - * Note, features that don't require KVM involvement in any way are - * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the - * guest CPUID provided by userspace. - */ - u32 cpu_caps[NR_KVM_CPU_CAPS]; - u64 reserved_gpa_bits; int maxphyaddr; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6d948d63306c..27e2f7e25038 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -284,7 +284,7 @@ static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu, bool has_feature) { cpuid_entry_change(entry, x86_feature, has_feature); - guest_cpu_cap_change(vcpu, x86_feature, has_feature); + guest_cpu_cap_change(vcpu->common, x86_feature, has_feature); } static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) @@ -382,7 +382,7 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) bool allow_gbpages; int i; - memset(vcpu->arch.cpu_caps, 0, sizeof(vcpu->arch.cpu_caps)); + memset(common->arch.cpu_caps, 0, sizeof(common->arch.cpu_caps)); BUILD_BUG_ON(ARRAY_SIZE(reverse_cpuid) != NR_KVM_CPU_CAPS); /* @@ -408,9 +408,9 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * in guest CPUID. Note, this includes features that are * supported by KVM but aren't advertised to userspace! */ - vcpu->arch.cpu_caps[i] = kvm_cpu_caps[i] | - cpuid_get_reg_unsafe(&emulated, cpuid.reg); - vcpu->arch.cpu_caps[i] &= cpuid_get_reg_unsafe(entry, cpuid.reg); + common->arch.cpu_caps[i] = kvm_cpu_caps[i] | + cpuid_get_reg_unsafe(&emulated, cpuid.reg); + common->arch.cpu_caps[i] &= cpuid_get_reg_unsafe(entry, cpuid.reg); } kvm_update_cpuid_runtime(vcpu); @@ -428,7 +428,7 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) */ allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) : guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES); - guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages); + guest_cpu_cap_change(common, X86_FEATURE_GBPAGES, allow_gbpages); best = kvm_find_cpuid_entry(vcpu, 1); if (best && apic) { @@ -536,8 +536,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, swap(common->arch.cpuid_entries, e2); swap(common->arch.cpuid_nent, nent); - memcpy(vcpu_caps, vcpu->arch.cpu_caps, sizeof(vcpu_caps)); - BUILD_BUG_ON(sizeof(vcpu_caps) != sizeof(vcpu->arch.cpu_caps)); + memcpy(vcpu_caps, common->arch.cpu_caps, sizeof(vcpu_caps)); + BUILD_BUG_ON(sizeof(vcpu_caps) != sizeof(common->arch.cpu_caps)); /* * KVM does not correctly handle changing guest CPUID after KVM_RUN or @@ -582,7 +582,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, return 0; err: - memcpy(vcpu->arch.cpu_caps, vcpu_caps, sizeof(vcpu_caps)); + memcpy(common->arch.cpu_caps, vcpu_caps, sizeof(vcpu_caps)); swap(common->arch.cpuid_entries, e2); swap(common->arch.cpuid_nent, nent); return r; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 143ea8531611..75abf447eabf 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -239,36 +239,37 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu, return vcpu->arch.pv_cpuid.features & (1u << kvm_feature); } -static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu, +static __always_inline void guest_cpu_cap_set(struct kvm_vcpu_common *common, unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature); + common->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature); } -static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu, +static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu_common *common, unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); + common->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); } -static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu, +static __always_inline void guest_cpu_cap_change(struct kvm_vcpu_common *common, unsigned int x86_feature, bool guest_has_cap) { if (guest_has_cap) - guest_cpu_cap_set(vcpu, x86_feature); + guest_cpu_cap_set(common, x86_feature); else - guest_cpu_cap_clear(vcpu, x86_feature); + guest_cpu_cap_clear(common, x86_feature); } static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu, unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); + struct kvm_vcpu_common *common = vcpu->common; /* * Except for MWAIT, querying dynamic feature bits is disallowed, so @@ -278,7 +279,7 @@ static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu, x86_feature == X86_FEATURE_OSXSAVE || x86_feature == X86_FEATURE_OSPKE); - return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature); + return common->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature); } static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 612db7ad8b2a..0b57dde29e40 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4706,7 +4706,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * XSS on VM-Enter/VM-Exit. Failure to do so would effectively give * the guest read/write access to the host's XSS. */ - guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES, + guest_cpu_cap_change(vcpu->common, X86_FEATURE_XSAVES, boot_cpu_has(X86_FEATURE_XSAVES) && guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE)); @@ -4716,7 +4716,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * SVM on Intel is bonkers and extremely unlikely to work). */ if (guest_cpuid_is_intel_compatible(vcpu)) - guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); + guest_cpu_cap_clear(vcpu->common, X86_FEATURE_V_VMSAVE_VMLOAD); if (is_sev_guest(vcpu)) sev_vcpu_after_set_cpuid(svm); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 62e180651143..d10aa5f60cad 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7994,7 +7994,7 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * set if and only if XSAVE is supported. */ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE)) - guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES); + guest_cpu_cap_clear(vcpu->common, X86_FEATURE_XSAVES); vmx_setup_uret_msrs(vmx); -- 2.53.0 From: Joerg Roedel Make sure to update CPUID dependent state for all VCPUs of a given plane when CPUID state is updated. Signed-off-by: Joerg Roedel --- arch/x86/kvm/cpuid.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 27e2f7e25038..fab075bb6fdc 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -513,6 +513,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, { struct kvm_vcpu_common *common = vcpu->common; u32 vcpu_caps[NR_KVM_CPU_CAPS]; + struct kvm_vcpu *v; + unsigned i; int r; /* @@ -562,9 +564,11 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, #ifdef CONFIG_KVM_HYPERV if (kvm_cpuid_has_hyperv(vcpu)) { - r = kvm_hv_vcpu_init(vcpu); - if (r) - goto err; + vcpu_for_each_plane(common, i, v) { + r = kvm_hv_vcpu_init(vcpu); + if (r) + goto err; + } } #endif @@ -572,10 +576,12 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, if (r) goto err; + vcpu_for_each_plane(vcpu->common, i, v) { #ifdef CONFIG_KVM_XEN - vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE); + v->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE); #endif - kvm_vcpu_after_set_cpuid(vcpu); + kvm_vcpu_after_set_cpuid(v); + } success: kvfree(e2); -- 2.53.0 From: Joerg Roedel Share the MTRR state across all planes of a given VCPU index. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/mtrr.c | 12 +++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b0d040528f9d..f30173093c44 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -817,6 +817,8 @@ struct kvm_vcpu_arch_common { */ u32 cpu_caps[NR_KVM_CPU_CAPS]; + /* Cache configuration state */ + struct kvm_mtrr mtrr_state; }; int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common); @@ -994,7 +996,6 @@ struct kvm_vcpu_arch { bool smi_pending; /* SMI queued after currently running handler */ u8 handling_intr_from_guest; - struct kvm_mtrr mtrr_state; u64 pat; unsigned switch_db_regs; diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 6f74e2b27c1e..610ff975e022 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -23,18 +23,20 @@ static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr) { + struct kvm_vcpu_common *common = vcpu->common; + int index; switch (msr) { case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1): index = msr - MTRRphysBase_MSR(0); - return &vcpu->arch.mtrr_state.var[index]; + return &common->arch.mtrr_state.var[index]; case MSR_MTRRfix64K_00000: - return &vcpu->arch.mtrr_state.fixed_64k; + return &common->arch.mtrr_state.fixed_64k; case MSR_MTRRfix16K_80000: case MSR_MTRRfix16K_A0000: index = msr - MSR_MTRRfix16K_80000; - return &vcpu->arch.mtrr_state.fixed_16k[index]; + return &common->arch.mtrr_state.fixed_16k[index]; case MSR_MTRRfix4K_C0000: case MSR_MTRRfix4K_C8000: case MSR_MTRRfix4K_D0000: @@ -44,9 +46,9 @@ static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr) case MSR_MTRRfix4K_F0000: case MSR_MTRRfix4K_F8000: index = msr - MSR_MTRRfix4K_C0000; - return &vcpu->arch.mtrr_state.fixed_4k[index]; + return &common->arch.mtrr_state.fixed_4k[index]; case MSR_MTRRdefType: - return &vcpu->arch.mtrr_state.deftype; + return &common->arch.mtrr_state.deftype; default: break; } -- 2.53.0 From: Joerg Roedel In the KVM_RUN path, select a runnable VCPU plane and use it to enter the guest. Also handle KVM_REQ_PLANE_RESCHED events to switch planes without exiting to user-space. Signed-off-by: Joerg Roedel --- arch/x86/kvm/x86.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7e94a378b3d2..b9828cd31136 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11398,6 +11398,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } } + + if (kvm_check_request(KVM_REQ_PLANE_RESCHED, vcpu)) { + vcpu->common->plane_switch = true; + r = 0; + goto out; + } } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win || @@ -12076,7 +12082,7 @@ static int kvm_x86_vcpu_pre_run(struct kvm_vcpu *vcpu) return kvm_x86_call(vcpu_pre_run)(vcpu); } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) +static int __kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_queued_exception *ex = &vcpu->arch.exception; struct kvm_run *kvm_run = vcpu->run; @@ -12196,6 +12202,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) return r; } +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu_plane0) +{ + struct kvm_vcpu_common *common = vcpu_plane0->common; + int ret; + + do { + struct kvm_vcpu *vcpu = kvm_vcpu_select_plane(vcpu_plane0); + + if (vcpu == NULL) + return -EINVAL; + + common->plane_switch = false; + + ret = __kvm_arch_vcpu_ioctl_run(vcpu); + if (ret) + break; + } while (vcpu_plane0->common->plane_switch); + + return ret; +} + static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { -- 2.53.0 From: Joerg Roedel These events must be handled on the plane-vcpu that they were raised on. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm_host.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f30173093c44..c2651774d785 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -85,20 +85,20 @@ KVM_X86_NOTIFY_VMEXIT_USER) /* x86-specific vcpu->requests bit members */ -#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) +#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_PLANE_REQ(0) #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) #define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) -#define KVM_REQ_EVENT KVM_ARCH_REQ(6) +#define KVM_REQ_EVENT KVM_ARCH_PLANE_REQ(6) #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) -#define KVM_REQ_NMI KVM_ARCH_REQ(9) -#define KVM_REQ_PMU KVM_ARCH_REQ(10) -#define KVM_REQ_PMI KVM_ARCH_REQ(11) +#define KVM_REQ_NMI KVM_ARCH_PLANE_REQ(9) +#define KVM_REQ_PMU KVM_ARCH_PLANE_REQ(10) +#define KVM_REQ_PMI KVM_ARCH_PLANE_REQ(11) #ifdef CONFIG_KVM_SMM -#define KVM_REQ_SMI KVM_ARCH_REQ(12) +#define KVM_REQ_SMI KVM_ARCH_PLANE_REQ(12) #endif #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) #define KVM_REQ_MCLOCK_INPROGRESS \ -- 2.53.0 From: Joerg Roedel Allow the hardware backend implementations to allocate the struct kvm_plane instances so that they can carry hardware specific information along them. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm-x86-ops.h | 2 ++ arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/svm/svm.c | 3 +++ arch/x86/kvm/vmx/main.c | 5 ++++- arch/x86/kvm/x86.c | 16 ++++++++++++++-- arch/x86/kvm/x86.h | 4 ++++ 6 files changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index c8bff1e9325e..207d56d12459 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -150,6 +150,8 @@ KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level) KVM_X86_OP_OPTIONAL(gmem_invalidate) +KVM_X86_OP(alloc_plane) +KVM_X86_OP(free_plane) #endif #undef KVM_X86_OP diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c2651774d785..0955097aca9c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2011,6 +2011,9 @@ struct kvm_x86_ops { int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); + + struct kvm_plane *(*alloc_plane)(void); + void (*free_plane)(struct kvm_plane *); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0b57dde29e40..2a92d8d18d7c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5445,6 +5445,9 @@ struct kvm_x86_ops svm_x86_ops __initdata = { .gmem_prepare = sev_gmem_prepare, .gmem_invalidate = sev_gmem_invalidate, .gmem_max_mapping_level = sev_gmem_max_mapping_level, + + .alloc_plane = x86_alloc_plane, + .free_plane = x86_free_plane, }; /* diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index f9c4703dda54..a2fc4eeeca1d 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -1030,7 +1030,10 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl), .vcpu_mem_enc_unlocked_ioctl = vt_op_tdx_only(vcpu_mem_enc_unlocked_ioctl), - .gmem_max_mapping_level = vt_op_tdx_only(gmem_max_mapping_level) + .gmem_max_mapping_level = vt_op_tdx_only(gmem_max_mapping_level), + + .alloc_plane = x86_alloc_plane, + .free_plane = x86_free_plane, }; struct kvm_x86_init_ops vt_init_ops __initdata = { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b9828cd31136..5f48392d4738 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -487,18 +487,30 @@ unsigned kvm_arch_max_planes(struct kvm *kvm) return 1; } -struct kvm_plane *kvm_alloc_plane(void) +struct kvm_plane *x86_alloc_plane(void) { /* For better type checking, do not return kzalloc() value directly */ struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT); return plane; } +EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_alloc_plane); -void kvm_free_plane(struct kvm_plane *plane) +void x86_free_plane(struct kvm_plane *plane) { kfree(plane); } +EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_free_plane); + +struct kvm_plane *kvm_alloc_plane(void) +{ + return kvm_x86_call(alloc_plane)(); +} + +void kvm_free_plane(struct kvm_plane *plane) +{ + kvm_x86_call(free_plane)(plane); +} /* * All feature MSRs except uCode revID, which tracks the currently loaded uCode diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 38a905fa86de..812bd6004a4c 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -797,4 +797,8 @@ static inline bool kvm_is_valid_u_s_cet(struct kvm_vcpu *vcpu, u64 data) return true; } + +struct kvm_plane *x86_alloc_plane(void); +void x86_free_plane(struct kvm_plane *plane); + #endif -- 2.53.0 From: Joerg Roedel The VMSAs are per plane, so this request must be too. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0955097aca9c..0327b77e56b7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -129,7 +129,7 @@ #define KVM_REQ_HV_TLB_FLUSH \ KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \ - KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT) + KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT | KVM_REQUEST_PER_PLANE) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ -- 2.53.0 From: Joerg Roedel The vcpu->arch.pio_data pointer is memory mapped to user-space alongside the kvm_run page. So it also needs to be common across all planes for a given VCPU index. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 20 +++++++++++--------- virt/kvm/kvm_main.c | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0327b77e56b7..1b7aa48c961e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -801,6 +801,8 @@ struct kvm_vcpu_arch_common { bool cpuid_dynamic_bits_dirty; bool is_amd_compatible; + void *pio_data; + /* * cpu_caps holds the effective guest capabilities, i.e. the features * the vCPU is allowed to use. Typically, but not always, features can diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f48392d4738..08fe65b8d57d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8532,7 +8532,7 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, } static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val, unsigned int count) + unsigned short port, void *val, unsigned int count) { int r = emulator_pio_in_out(vcpu, size, port, val, count, true); if (r) @@ -12936,7 +12936,6 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { - struct page *page; int r; vcpu->arch.last_vmentry_cpu = -1; @@ -12960,10 +12959,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) r = -ENOMEM; - page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!page) - goto fail_free_lapic; - vcpu->arch.pio_data = page_address(page); + vcpu->arch.pio_data = vcpu->common->arch.pio_data; vcpu->arch.mce_banks = kcalloc(KVM_MAX_MCE_BANKS * 4, sizeof(u64), GFP_KERNEL_ACCOUNT); @@ -13023,8 +13019,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) fail_free_mce_banks: kfree(vcpu->arch.mce_banks); kfree(vcpu->arch.mci_ctl2_banks); - free_page((unsigned long)vcpu->arch.pio_data); -fail_free_lapic: kvm_free_lapic(vcpu); fail_mmu_destroy: kvm_mmu_destroy(vcpu); @@ -13072,16 +13066,24 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); - free_page((unsigned long)vcpu->arch.pio_data); } int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { + struct page *page; + + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!page) + return -ENOMEM; + + common->arch.pio_data = page_address(page); + return 0; } void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) { + free_page((unsigned long)common->arch.pio_data); kvfree(common->arch.cpuid_entries); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a6d7601c3412..8f1a16af519a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4250,7 +4250,7 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) page = virt_to_page(vcpu->run); #ifdef CONFIG_X86 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) - page = virt_to_page(vcpu->arch.pio_data); + page = virt_to_page(vcpu->common->arch.pio_data); #endif #ifdef CONFIG_KVM_MMIO else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) -- 2.53.0 From: Joerg Roedel When there are IRQs or events pending for plane0, make sure it can handle it. Signed-off-by: Joerg Roedel --- arch/x86/kvm/x86.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 08fe65b8d57d..60b34bd4da9d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10962,6 +10962,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, return r; } +static inline bool kvm_check_plane0_events(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *vcpu_plane0; + + if (vcpu->plane_level == 0) + return false; + + vcpu_plane0 = vcpu->common->vcpus[0]; + + return kvm_cpu_has_injectable_intr(vcpu_plane0) || + vcpu_plane0->arch.nmi_pending || + vcpu_plane0->arch.smi_pending; +} + static void process_nmi(struct kvm_vcpu *vcpu) { unsigned int limit; @@ -11410,12 +11424,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } } + } - if (kvm_check_request(KVM_REQ_PLANE_RESCHED, vcpu)) { - vcpu->common->plane_switch = true; - r = 0; - goto out; - } + if (kvm_check_plane0_events(vcpu)) { + kvm_vcpu_set_plane_runnable(vcpu->common->vcpus[0]); + + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_make_request(KVM_REQ_PLANE_RESCHED, vcpu); + } + + if (kvm_check_request(KVM_REQ_PLANE_RESCHED, vcpu)) { + vcpu->common->plane_switch = true; + r = 0; + goto out; } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win || @@ -11737,6 +11758,9 @@ bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) return true; + if (kvm_test_request(KVM_REQ_PLANE_RESCHED, vcpu)) + return true; + if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)) return true; -- 2.53.0 From: Paolo Bonzini Allow x86 hardware backends to overwrite the number of supported planes per VM type. Signed-off-by: Paolo Bonzini Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm/svm.c | 1 + arch/x86/kvm/vmx/main.c | 1 + arch/x86/kvm/x86.c | 8 +++++++- arch/x86/kvm/x86.h | 1 + 6 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 207d56d12459..4f96090c04c9 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -152,6 +152,7 @@ KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level) KVM_X86_OP_OPTIONAL(gmem_invalidate) KVM_X86_OP(alloc_plane) KVM_X86_OP(free_plane) +KVM_X86_OP(max_planes) #endif #undef KVM_X86_OP diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1b7aa48c961e..bfa0188d372f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2016,6 +2016,8 @@ struct kvm_x86_ops { struct kvm_plane *(*alloc_plane)(void); void (*free_plane)(struct kvm_plane *); + + unsigned (*max_planes)(struct kvm *); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2a92d8d18d7c..99357de14034 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5448,6 +5448,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = { .alloc_plane = x86_alloc_plane, .free_plane = x86_free_plane, + .max_planes = kvm_x86_default_max_planes, }; /* diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index a2fc4eeeca1d..572921bdfb32 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -1034,6 +1034,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .alloc_plane = x86_alloc_plane, .free_plane = x86_free_plane, + .max_planes = kvm_x86_default_max_planes, }; struct kvm_x86_init_ops vt_init_ops __initdata = { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 60b34bd4da9d..c6910356b061 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -482,10 +482,16 @@ static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; static unsigned int num_msr_based_features; -unsigned kvm_arch_max_planes(struct kvm *kvm) +unsigned kvm_x86_default_max_planes(struct kvm *kvm) { return 1; } +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_default_max_planes); + +unsigned kvm_arch_max_planes(struct kvm *kvm) +{ + return kvm_x86_call(max_planes)(kvm); +} struct kvm_plane *x86_alloc_plane(void) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 812bd6004a4c..ff57ba568031 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -800,5 +800,6 @@ static inline bool kvm_is_valid_u_s_cet(struct kvm_vcpu *vcpu, u64 data) struct kvm_plane *x86_alloc_plane(void); void x86_free_plane(struct kvm_plane *plane); +unsigned kvm_x86_default_max_planes(struct kvm *kvm); #endif -- 2.53.0 From: Joerg Roedel The code right now only supports plane-aware IOAPIC IRQ routing for IRQ-chip in split mode. Enforce that restriction in the KVM x86 code. Signed-off-by: Joerg Roedel --- arch/x86/kvm/x86.c | 8 ++++++-- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 5 +++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c6910356b061..0b9fa1059481 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -490,6 +490,10 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_default_max_planes); unsigned kvm_arch_max_planes(struct kvm *kvm) { + /* For now, planes are only supported with irqchip=split */ + if (!irqchip_split(kvm)) + return 1; + return kvm_x86_call(max_planes)(kvm); } @@ -6833,7 +6837,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (cap->args[0] > KVM_MAX_IRQ_ROUTES) goto split_irqchip_unlock; r = -EEXIST; - if (irqchip_in_kernel(kvm)) + if (irqchip_in_kernel(kvm) || kvm->has_planes) goto split_irqchip_unlock; if (kvm->created_vcpus) goto split_irqchip_unlock; @@ -7398,7 +7402,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) goto create_irqchip_unlock; r = -EINVAL; - if (kvm->created_vcpus) + if (kvm->created_vcpus || kvm->has_planes) goto create_irqchip_unlock; r = kvm_pic_init(kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b62fb354267..dbf81e2520f2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -918,6 +918,7 @@ struct kvm { struct list_head gpc_list; struct kvm_plane *planes[KVM_MAX_PLANES]; + bool has_planes; /* * created_vcpus is protected by kvm->lock, and is incremented diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8f1a16af519a..ff27cdbe8d92 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5477,6 +5477,10 @@ static int kvm_vm_ioctl_create_plane(struct kvm *kvm, unsigned id) WARN_ON_ONCE(id >= KVM_MAX_PLANES)) return -EINVAL; + /* Planes are only supported with in-kernel IRQ-chip */ + if (!kvm_arch_irqchip_in_kernel(kvm)) + return -EINVAL; + guard(mutex)(&kvm->lock); if (kvm->planes[id]) return -EEXIST; @@ -5498,6 +5502,7 @@ static int kvm_vm_ioctl_create_plane(struct kvm *kvm, unsigned id) goto put_kvm; } + kvm->has_planes = true; fd_install(fd, file); return fd; -- 2.53.0 From: Joerg Roedel Planes can have different set of SEV features enabled. Track the enabled features per plane instead of per VM. Signed-off-by: Joerg Roedel --- arch/x86/kvm/svm/sev.c | 37 ++++++++++++++++++++----------------- arch/x86/kvm/svm/svm.c | 21 +++++++++++++++++++-- arch/x86/kvm/svm/svm.h | 24 +++++++++++++++++++++--- 3 files changed, 60 insertions(+), 22 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a23dcb081751..12b039823c1c 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -204,17 +204,16 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm) static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm) { - struct kvm_vcpu *vcpu = &svm->vcpu; - struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm); + struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(svm->vcpu.plane); - return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP; + return sev_plane->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP; } static bool snp_is_secure_tsc_enabled(struct kvm *kvm) { - struct kvm_sev_info *sev = to_kvm_sev_info(kvm); + struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(kvm->planes[0]); - return (sev->vmsa_features & SVM_SEV_FEAT_SECURE_TSC) && + return (sev_plane->vmsa_features & SVM_SEV_FEAT_SECURE_TSC) && !WARN_ON_ONCE(!sev_snp_guest(kvm)); } @@ -496,6 +495,7 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, struct kvm_sev_init *data, unsigned long vm_type) { + struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(kvm->planes[0]); struct kvm_sev_info *sev = to_kvm_sev_info(kvm); struct sev_platform_init_args init_args = {0}; bool es_active = vm_type != KVM_X86_SEV_VM; @@ -534,11 +534,11 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, sev->active = true; sev->es_active = es_active; - sev->vmsa_features = data->vmsa_features; + sev_plane->vmsa_features = data->vmsa_features; sev->ghcb_version = data->ghcb_version; if (snp_active) - sev->vmsa_features |= SVM_SEV_FEAT_SNP_ACTIVE; + sev_plane->vmsa_features |= SVM_SEV_FEAT_SNP_ACTIVE; ret = sev_asid_new(sev, vm_type); if (ret) @@ -576,7 +576,7 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, sev_asid_free(sev); sev->asid = 0; e_no_asid: - sev->vmsa_features = 0; + sev_plane->vmsa_features = 0; sev->es_active = false; sev->active = false; return ret; @@ -931,7 +931,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_es_sync_vmsa(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm); + struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(vcpu->plane); struct sev_es_save_area *save = svm->sev_es.vmsa; struct xregs_state *xsave; const u8 *s; @@ -982,7 +982,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; - save->sev_features = sev->vmsa_features; + save->sev_features = sev_plane->vmsa_features; /* * Skip FPU and AVX setup with KVM_SEV_ES_INIT to avoid @@ -2026,6 +2026,8 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { + struct kvm_sev_info_plane *dst_plane = to_kvm_sev_info_plane(dst_kvm->planes[0]); + struct kvm_sev_info_plane *src_plane = to_kvm_sev_info_plane(src_kvm->planes[0]); struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm); struct kvm_sev_info *src = to_kvm_sev_info(src_kvm); struct kvm_vcpu *dst_vcpu, *src_vcpu; @@ -2039,7 +2041,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) dst->pages_locked = src->pages_locked; dst->enc_context_owner = src->enc_context_owner; dst->es_active = src->es_active; - dst->vmsa_features = src->vmsa_features; + dst_plane->vmsa_features = src_plane->vmsa_features; src->asid = 0; src->active = false; @@ -4157,7 +4159,7 @@ static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) static int sev_snp_ap_creation(struct vcpu_svm *svm) { - struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm); + struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(svm->vcpu.plane); struct kvm_vcpu *vcpu = &svm->vcpu; struct kvm_vcpu *target_vcpu; struct vcpu_svm *target_svm; @@ -4182,9 +4184,9 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm) switch (request) { case SVM_VMGEXIT_AP_CREATE_ON_INIT: case SVM_VMGEXIT_AP_CREATE: - if (vcpu->arch.regs[VCPU_REGS_RAX] != sev->vmsa_features) { + if (vcpu->arch.regs[VCPU_REGS_RAX] != sev_plane->vmsa_features) { vcpu_unimpl(vcpu, "vmgexit: mismatched AP sev_features [%#lx] != [%#llx] from guest\n", - vcpu->arch.regs[VCPU_REGS_RAX], sev->vmsa_features); + vcpu->arch.regs[VCPU_REGS_RAX], sev_plane->vmsa_features); return -EINVAL; } @@ -4815,15 +4817,16 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) static void sev_snp_init_vmcb(struct vcpu_svm *svm) { - struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + struct kvm_sev_info_plane *sev_plane = &to_kvm_svm_plane(svm->vcpu.plane)->sev_info_plane; /* V_NMI is not supported when Restricted Injection is enabled */ - if (sev->vmsa_features & SVM_SEV_FEAT_RESTRICTED_INJECTION) + if (sev_plane->vmsa_features & SVM_SEV_FEAT_RESTRICTED_INJECTION) svm->vmcb->control.int_ctl &= ~V_NMI_ENABLE_MASK; } static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event) { + struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(svm->vcpu.plane); struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm); struct vmcb *vmcb = svm->vmcb01.ptr; struct kvm_vcpu *vcpu = &svm->vcpu; @@ -4845,7 +4848,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event) } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) - svm->vmcb->control.allowed_sev_features = sev->vmsa_features | + svm->vmcb->control.allowed_sev_features = sev_plane->vmsa_features | VMCB_ALLOWED_SEV_FEATURES_VALID; /* Can't intercept CR register access, HV can't modify CR registers */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 99357de14034..2ae82dc058c9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5304,6 +5304,23 @@ static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu) return page_address(page); } +static struct kvm_plane *svm_alloc_plane(void) +{ + struct kvm_svm_plane *svm_plane = kzalloc(sizeof(*svm_plane), GFP_KERNEL_ACCOUNT); + + if (svm_plane) + return &svm_plane->plane; + + return NULL; +} + +static void svm_free_plane(struct kvm_plane *plane) +{ + struct kvm_svm_plane *svm_plane = to_kvm_svm_plane(plane); + + kfree(svm_plane); +} + struct kvm_x86_ops svm_x86_ops __initdata = { .name = KBUILD_MODNAME, @@ -5446,8 +5463,8 @@ struct kvm_x86_ops svm_x86_ops __initdata = { .gmem_invalidate = sev_gmem_invalidate, .gmem_max_mapping_level = sev_gmem_max_mapping_level, - .alloc_plane = x86_alloc_plane, - .free_plane = x86_free_plane, + .alloc_plane = svm_alloc_plane, + .free_plane = svm_free_plane, .max_planes = kvm_x86_default_max_planes, }; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7d27ed7099a8..57033922ddcf 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -110,7 +110,6 @@ struct kvm_sev_info { unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ u64 ap_jump_table; /* SEV-ES AP Jump Table address */ - u64 vmsa_features; u16 ghcb_version; /* Highest guest GHCB protocol version allowed */ struct kvm *enc_context_owner; /* Owner of copied encryption context */ struct list_head mirror_vms; /* List of VMs mirroring */ @@ -140,6 +139,15 @@ struct kvm_svm { #endif }; +struct kvm_sev_info_plane { + u64 vmsa_features; +}; + +struct kvm_svm_plane { + struct kvm_plane plane; + struct kvm_sev_info_plane sev_info_plane; +}; + struct kvm_vcpu; struct kvm_vmcb_info { @@ -394,6 +402,16 @@ static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) return container_of(kvm, struct kvm_svm, kvm); } +static __always_inline struct kvm_svm_plane *to_kvm_svm_plane(struct kvm_plane *plane) +{ + return container_of(plane, struct kvm_svm_plane, plane); +} + +static __always_inline struct kvm_sev_info_plane *to_kvm_sev_info_plane(struct kvm_plane *plane) +{ + return &to_kvm_svm_plane(plane)->sev_info_plane; +} + #ifdef CONFIG_KVM_AMD_SEV static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm) { @@ -413,7 +431,7 @@ static __always_inline bool ____sev_es_guest(struct kvm *kvm) static __always_inline bool ____sev_snp_guest(struct kvm *kvm) { - struct kvm_sev_info *sev = to_kvm_sev_info(kvm); + struct kvm_sev_info_plane *sev = to_kvm_sev_info_plane(kvm->planes[0]); return (sev->vmsa_features & SVM_SEV_FEAT_SNP_ACTIVE) && !WARN_ON_ONCE(!____sev_es_guest(kvm)); @@ -984,7 +1002,7 @@ void sev_snp_cancel_injection(struct kvm_vcpu *vcpu); bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu); static inline bool sev_snp_is_rinj_active(struct kvm_vcpu *vcpu) { - struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; + struct kvm_sev_info_plane *sev = &to_kvm_svm_plane(vcpu->plane)->sev_info_plane; return is_sev_snp_guest(vcpu) && (sev->vmsa_features & SVM_SEV_FEAT_RESTRICTED_INJECTION); -- 2.53.0 From: Tom Lendacky Implement the GET_APIC_IDS NAE event to gather and return the list of APIC IDs for all vCPUs in the guest. Since it is now possible to launch vCPUs without going through the LAUNCH_UPDATE process, be sure to mark the guest state protected and to enable LBR virtualization. Since it is now possible to launch vCPUs by APIC ID before the first INIT-SIPI request, be sure to check for the AP create event in the kvm_arch_vcpu_ioctl_run() loop when the AP is in the uninitialized state. Signed-off-by: Tom Lendacky Co-developed-by: Joerg Roedel Co-developed-by: Carlos López Signed-off-by: Joerg Roedel --- arch/x86/include/asm/sev-common.h | 1 + arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/svm/sev.c | 87 +++++++++++++++++++++++++++++-- 3 files changed, 86 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index ee17a3541b55..cedb7ea91da5 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -137,6 +137,7 @@ enum psc_op { #define GHCB_HV_FT_SNP BIT_ULL(0) #define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) #define GHCB_HV_FT_SNP_RINJ (BIT_ULL(2) | GHCB_HV_FT_SNP_AP_CREATION) +#define GHCB_HV_FT_APIC_ID_LIST BIT_ULL(4) #define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5) /* diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index d281dd21c540..91395b82eadd 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -123,6 +123,7 @@ #define SVM_VMGEXIT_HVDB_QUERY 2 #define SVM_VMGEXIT_HVDB_CLEAR 3 #define SVM_VMGEXIT_HV_IPI 0x80000015ull +#define SVM_VMGEXIT_GET_APIC_IDS 0x80000017ull #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull #define SVM_VMGEXIT_SAVIC 0x8000001aull #define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 12b039823c1c..c0b2879f8e9f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -40,9 +40,10 @@ #define GHCB_VERSION_MAX 2ULL #define GHCB_VERSION_MIN 1ULL -#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | \ - GHCB_HV_FT_SNP_AP_CREATION | \ - GHCB_HV_FT_SNP_RINJ) +#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | \ + GHCB_HV_FT_SNP_AP_CREATION | \ + GHCB_HV_FT_SNP_RINJ | \ + GHCB_HV_FT_APIC_ID_LIST) /* * The GHCB spec essentially states that all non-zero error codes other than @@ -3518,6 +3519,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) if (!kvm_ghcb_rax_is_valid(svm)) goto vmgexit_err; break; + case SVM_VMGEXIT_GET_APIC_IDS: + if (!kvm_ghcb_rax_is_valid(svm)) + goto vmgexit_err; + break; case SVM_VMGEXIT_NMI_COMPLETE: case SVM_VMGEXIT_AP_HLT_LOOP: case SVM_VMGEXIT_AP_JUMP_TABLE: @@ -4439,6 +4444,78 @@ static int sev_snp_hv_ipi(struct vcpu_svm *svm) return 0; } +struct sev_apic_id_desc { + u32 num_entries; + u32 apic_ids[]; +}; + +static void sev_get_apic_ids(struct vcpu_svm *svm) +{ + struct ghcb *ghcb = svm->sev_es.ghcb; + struct kvm_vcpu *vcpu = &svm->vcpu, *loop_vcpu; + struct kvm *kvm = vcpu->kvm; + unsigned int id_desc_size; + struct sev_apic_id_desc *desc; + struct page *page; + gpa_t gpa; + u64 pages; + unsigned long i; + int n; + + pages = vcpu->arch.regs[VCPU_REGS_RAX]; + + /* Each APIC ID is 32-bits in size, so make sure there is room */ + n = atomic_read(&kvm->online_vcpus); + /*TODO: is this possible? */ + if (n < 0) + return; + + id_desc_size = sizeof(*desc); + id_desc_size += n * sizeof(desc->apic_ids[0]); + if (id_desc_size > (pages * PAGE_SIZE)) { + vcpu->arch.regs[VCPU_REGS_RAX] = PFN_UP(id_desc_size); + return; + } + + gpa = svm->vmcb->control.exit_info_1; + + ghcb_set_sw_exit_info_1(ghcb, 2); + ghcb_set_sw_exit_info_2(ghcb, 5); + + if (!page_address_valid(vcpu, gpa)) + return; + + page = gfn_to_page(kvm, gpa_to_gfn(gpa)); + kvm_release_page_unused(page); + if (!page) + return; + + if (!pages) + return; + + /* Allocate a buffer to hold the APIC IDs */ + desc = kvzalloc(id_desc_size, GFP_KERNEL_ACCOUNT); + if (!desc) + return; + + desc->num_entries = n; + kvm_for_each_vcpu(i, loop_vcpu, kvm) { + /*TODO: is this possible? */ + if (i >= n) + break; + + desc->apic_ids[i] = loop_vcpu->vcpu_id; + } + + if (!kvm_write_guest(kvm, gpa, desc, id_desc_size)) { + /* IDs were successfully written */ + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + } + + kvfree(desc); +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4730,6 +4807,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) } ret = 1; break; + case SVM_VMGEXIT_GET_APIC_IDS: + sev_get_apic_ids(svm); + ret = 1; + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", -- 2.53.0 From: Tom Lendacky Update AP creation to support ADD/DESTROY of VMSAs at levels other than VMPL0 in order to run under an SVSM at VMPL1 or lower. To maintain backwards compatibility, the VMPL is specified in bits 16 to 19 of the AP Creation request in SW_EXITINFO1 of the GHCB. In order to track the VMSAs at different levels, create arrays for the VMSAs, GHCBs, registered GHCBs and others. When switching VMPL levels, these entries will be used to set the VMSA and GHCB physical addresses in the VMCB for the VMPL level. In order ensure that the proper responses are returned in the proper GHCB, the GHCB must be unmapped at the current level and saved for restoration later when switching back to that VMPL level. Additional checks are applied to prevent a non-VMPL0 vCPU from being able to perform an AP creation request at VMPL0. Additionally, a vCPU cannot replace its own VMSA. Signed-off-by: Tom Lendacky Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/x86/include/asm/svm.h | 9 +++ arch/x86/include/uapi/asm/svm.h | 2 + arch/x86/kvm/svm/sev.c | 134 +++++++++++++++++++++++++------- arch/x86/kvm/svm/svm.h | 1 + arch/x86/kvm/x86.c | 9 +++ 5 files changed, 126 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 9822b0b346ae..32a35ee10bce 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -345,6 +345,15 @@ static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AV #define VMCB_ALLOWED_SEV_FEATURES_VALID BIT_ULL(63) +enum { + SVM_SEV_VMPL0 = 0, + SVM_SEV_VMPL1, + SVM_SEV_VMPL2, + SVM_SEV_VMPL3, + + SVM_SEV_VMPL_MAX +}; + struct vmcb_seg { u16 selector; u16 attrib; diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 91395b82eadd..60b7a52f6f7e 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -123,6 +123,8 @@ #define SVM_VMGEXIT_HVDB_QUERY 2 #define SVM_VMGEXIT_HVDB_CLEAR 3 #define SVM_VMGEXIT_HV_IPI 0x80000015ull +#define SVM_VMGEXIT_AP_VMPL_MASK GENMASK(19, 16) +#define SVM_VMGEXIT_AP_VMPL_SHIFT 16 #define SVM_VMGEXIT_GET_APIC_IDS 0x80000017ull #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull #define SVM_VMGEXIT_SAVIC 0x8000001aull diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c0b2879f8e9f..53cd3aba7368 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3512,13 +3512,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) if (!kvm_ghcb_sw_scratch_is_valid(svm)) goto vmgexit_err; break; - case SVM_VMGEXIT_AP_CREATION: + case SVM_VMGEXIT_AP_CREATION: { + unsigned int request; + if (!is_sev_snp_guest(vcpu)) goto vmgexit_err; - if (lower_32_bits(control->exit_info_1) != SVM_VMGEXIT_AP_DESTROY) + + request = lower_32_bits(control->exit_info_1); + request &= ~SVM_VMGEXIT_AP_VMPL_MASK; + if (request != SVM_VMGEXIT_AP_DESTROY) if (!kvm_ghcb_rax_is_valid(svm)) goto vmgexit_err; break; + } case SVM_VMGEXIT_GET_APIC_IDS: if (!kvm_ghcb_rax_is_valid(svm)) goto vmgexit_err; @@ -4151,8 +4157,26 @@ static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) /* Use the new VMSA */ svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn); + /* + * The vCPU may not have gone through the LAUNCH_UPDATE process, so mark + * the guest state as protected. + */ + vcpu->arch.guest_state_protected = true; + + /* + * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it + * only after setting guest_state_protected because KVM_SET_MSRS allows + * dynamic toggling of LBRV (for performance reason) on write access to + * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set. + */ + svm_enable_lbrv(vcpu); + /* Mark the vCPU as runnable */ - kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); + if (svm->sev_es.snp_ap_runnable) { + kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); + } else { + kvm_set_mp_state(vcpu, KVM_MP_STATE_UNINITIALIZED); + } /* * gmem pages aren't currently migratable, but if this ever changes @@ -4162,36 +4186,87 @@ static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) kvm_release_page_clean(page); } -static int sev_snp_ap_creation(struct vcpu_svm *svm) +static unsigned int get_ap_creation_request(struct vcpu_svm *svm) { - struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(svm->vcpu.plane); - struct kvm_vcpu *vcpu = &svm->vcpu; - struct kvm_vcpu *target_vcpu; - struct vcpu_svm *target_svm; - unsigned int request; +// struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(svm->vcpu.plane); +// struct kvm_vcpu *vcpu = &svm->vcpu; + unsigned int req = lower_32_bits(svm->vmcb->control.exit_info_1); + + return req & ~SVM_VMGEXIT_AP_VMPL_MASK; +} + +static unsigned int get_ap_creation_vmpl(struct vcpu_svm *svm) +{ + unsigned int req = lower_32_bits(svm->vmcb->control.exit_info_1); + + return (req & SVM_VMGEXIT_AP_VMPL_MASK) >> SVM_VMGEXIT_AP_VMPL_SHIFT; +} + +static unsigned int get_ap_creation_apic_id(struct vcpu_svm *svm) +{ + return upper_32_bits(svm->vmcb->control.exit_info_1); +} + +#define SVM_SEV_VMPL_MAX 4 + +static int sev_snp_ap_creation(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *target_svm = NULL, *svm = to_svm(vcpu); + struct kvm_sev_info_plane *target_sev_plane = NULL; + struct kvm_plane *target_plane = NULL; + struct kvm_vcpu *target_vcpu = NULL; unsigned int apic_id; + unsigned int request; + unsigned int vmpl; - request = lower_32_bits(svm->vmcb->control.exit_info_1); - apic_id = upper_32_bits(svm->vmcb->control.exit_info_1); + request = get_ap_creation_request(svm); + apic_id = get_ap_creation_apic_id(svm); + vmpl = get_ap_creation_vmpl(svm); - /* Validate the APIC ID */ - target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id); - if (!target_vcpu) { - vcpu_unimpl(vcpu, "vmgexit: invalid AP APIC ID [%#x] from guest\n", - apic_id); + /* Validate the requested VMPL level */ + if (vmpl >= SVM_SEV_VMPL_MAX) { + vcpu_unimpl(vcpu, "vmgexit: invalid VMPL level [%u] from guest\n", + vmpl); return -EINVAL; } + vmpl = array_index_nospec(vmpl, SVM_SEV_VMPL_MAX); + + /* Obtain the target plane and vCPU */ + target_plane = vcpu->kvm->planes[vmpl]; + if (target_plane) { + target_vcpu = plane_get_vcpu(target_plane, apic_id); + } else { + target_vcpu = NULL; + } + + /* Request user-space to create target plane VCPU if it does not exist */ + if (!target_plane || !target_vcpu) { + vcpu->arch.complete_userspace_io = sev_snp_ap_creation; + return kvm_request_create_plane(vcpu, vmpl, apic_id); + } target_svm = to_svm(target_vcpu); + target_sev_plane = &to_kvm_svm_plane(target_svm->vcpu.plane)->sev_info_plane; guard(mutex)(&target_svm->sev_es.snp_vmsa_mutex); + /* VMPL0 can only be replaced by another vCPU running VMPL0 */ + if (vmpl == SVM_SEV_VMPL0 && + (vcpu == target_vcpu || vcpu->plane_level != SVM_SEV_VMPL0)) { + vcpu_unimpl(vcpu, "vmgexit: VMPL0 AP action not allowed\n"); + return -EINVAL; + } + switch (request) { case SVM_VMGEXIT_AP_CREATE_ON_INIT: case SVM_VMGEXIT_AP_CREATE: - if (vcpu->arch.regs[VCPU_REGS_RAX] != sev_plane->vmsa_features) { + /* Initialize target planes SEV features if necessary */ + if (target_sev_plane->vmsa_features == 0) + target_sev_plane->vmsa_features = vcpu->arch.regs[VCPU_REGS_RAX]; + + if (vcpu->arch.regs[VCPU_REGS_RAX] != target_sev_plane->vmsa_features) { vcpu_unimpl(vcpu, "vmgexit: mismatched AP sev_features [%#lx] != [%#llx] from guest\n", - vcpu->arch.regs[VCPU_REGS_RAX], sev_plane->vmsa_features); + vcpu->arch.regs[VCPU_REGS_RAX], target_sev_plane->vmsa_features); return -EINVAL; } @@ -4226,16 +4301,18 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm) return -EINVAL; } + /* Signal the vCPU to update its state */ + kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu); + target_svm->sev_es.snp_ap_waiting_for_reset = true; + target_svm->sev_es.snp_ap_runnable = (request == SVM_VMGEXIT_AP_CREATE); - /* - * Unless Creation is deferred until INIT, signal the vCPU to update - * its state. - */ - if (request != SVM_VMGEXIT_AP_CREATE_ON_INIT) - kvm_make_request_and_kick(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu); + if (request == SVM_VMGEXIT_AP_CREATE) + kvm_make_request(KVM_REQ_PLANE_RESCHED, target_vcpu); - return 0; + kvm_vcpu_kick(target_vcpu); + + return 1; } static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) @@ -4779,12 +4856,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = snp_begin_psc(svm); break; case SVM_VMGEXIT_AP_CREATION: - ret = sev_snp_ap_creation(svm); - if (ret) { + ret = sev_snp_ap_creation(vcpu); + if (ret < 0) { svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT); + ret = 1; } - - ret = 1; break; case SVM_VMGEXIT_GUEST_REQUEST: ret = snp_handle_guest_req(svm, control->exit_info_1, control->exit_info_2); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 57033922ddcf..7e860f2abafb 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -282,6 +282,7 @@ struct vcpu_sev_es_state { struct mutex snp_vmsa_mutex; /* Used to handle concurrent updates of VMSA. */ gpa_t snp_vmsa_gpa; + bool snp_ap_runnable; bool snp_ap_waiting_for_reset; bool snp_has_guest_vmsa; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b9fa1059481..ad05350bb393 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12165,6 +12165,15 @@ static int __kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_vcpu_block(vcpu); kvm_vcpu_srcu_read_lock(vcpu); + /* + * It is possible that the vCPU has never run before. If the + * request is to update the protected guest state (AP Create), + * then ensure that the vCPU can now run. + */ + if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu) && + vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (kvm_apic_accept_events(vcpu) < 0) { r = 0; goto out; -- 2.53.0 From: Tom Lendacky Implement the SNP Run VMPL NAE event and MSR protocol to allow a guest to request a different VMPL level VMSA be run for the vCPU. This allows the guest to "call" an SVSM to process an SVSM request. Signed-off-by: Tom Lendacky Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/x86/include/asm/sev-common.h | 6 +++ arch/x86/kvm/svm/sev.c | 71 +++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index cedb7ea91da5..a09cf5690aba 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -114,6 +114,8 @@ enum psc_op { /* GHCB Run at VMPL Request/Response */ #define GHCB_MSR_VMPL_REQ 0x016 +#define GHCB_MSR_VMPL_LEVEL_POS 32 +#define GHCB_MSR_VMPL_LEVEL_MASK GENMASK_ULL(7, 0) #define GHCB_MSR_VMPL_REQ_LEVEL(v) \ /* GHCBData[39:32] */ \ ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \ @@ -121,6 +123,10 @@ enum psc_op { GHCB_MSR_VMPL_REQ) #define GHCB_MSR_VMPL_RESP 0x017 +#define GHCB_MSR_VMPL_ERROR_POS 32 +#define GHCB_MSR_VMPL_ERROR_MASK GENMASK_ULL(31, 0) +#define GHCB_MSR_VMPL_RSVD_POS 12 +#define GHCB_MSR_VMPL_RSVD_MASK GENMASK_ULL(19, 0) #define GHCB_MSR_VMPL_RESP_VAL(v) \ /* GHCBData[63:32] */ \ (((u64)(v) & GENMASK_ULL(63, 32)) >> 32) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 53cd3aba7368..b67566fcb69e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3556,6 +3556,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) if (!sev_snp_guest(vcpu->kvm)) goto vmgexit_err; break; + case SVM_VMGEXIT_SNP_RUN_VMPL: + if (!sev_snp_guest(vcpu->kvm)) + goto vmgexit_err; + break; default: reason = GHCB_ERR_INVALID_EVENT; goto vmgexit_err; @@ -4593,6 +4597,45 @@ static void sev_get_apic_ids(struct vcpu_svm *svm) kvfree(desc); } +static int __sev_snp_run_vmpl(struct vcpu_svm *svm, unsigned int vmpl) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_vcpu *target = vcpu->common->vcpus[vmpl]; + struct vcpu_svm *target_svm = to_svm(target); + + if (!target) + return -EINVAL; + + /* Mark current plane as stopped so it is not selected */ + kvm_set_mp_state(target, KVM_MP_STATE_RUNNABLE); + /* In case KVM_REQ_UPDATE_PROTECTED_GUEST_STATE is set - mark the new VMSA as runnable */ + target_svm->sev_es.snp_ap_runnable = true; + kvm_vcpu_set_plane_runnable(target); + kvm_vcpu_set_plane_stopped(vcpu); + + kvm_make_request(KVM_REQ_PLANE_RESCHED, vcpu); + + return 1; +} + +static int sev_snp_run_vmpl(struct vcpu_svm *svm) +{ + struct ghcb *ghcb = svm->sev_es.ghcb; + struct kvm_vcpu *vcpu = &svm->vcpu; + unsigned int vmpl; + + vmpl = lower_32_bits(svm->vmcb->control.exit_info_1); + if (vmpl >= SVM_SEV_VMPL_MAX) { + vcpu_unimpl(vcpu, "vmgexit: invalid VMPL level [%u] from guest\n", vmpl); + return -EINVAL; + } + + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + return __sev_snp_run_vmpl(svm, vmpl); +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4704,6 +4747,27 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) ret = snp_begin_psc_msr(svm, control->ghcb_gpa); break; + case GHCB_MSR_VMPL_REQ: { + unsigned int vmpl; + + if (!sev_snp_guest(vcpu->kvm)) + goto out_terminate; + + vmpl = get_ghcb_msr_bits(svm, GHCB_MSR_VMPL_LEVEL_MASK, GHCB_MSR_VMPL_LEVEL_POS); + + set_ghcb_msr_bits(svm, 0, GHCB_MSR_VMPL_ERROR_MASK, GHCB_MSR_VMPL_ERROR_POS); + set_ghcb_msr_bits(svm, 0, GHCB_MSR_VMPL_RSVD_MASK, GHCB_MSR_VMPL_RSVD_POS); + set_ghcb_msr_bits(svm, GHCB_MSR_VMPL_RESP, GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS); + + if (vmpl >= SVM_SEV_VMPL_MAX) { + vcpu_unimpl(vcpu, "vmgexit: invalid VMPL level [%u] from guest\n", vmpl); + set_ghcb_msr_bits(svm, 1, GHCB_MSR_VMPL_ERROR_MASK, GHCB_MSR_VMPL_ERROR_POS); + break; + } + + ret = __sev_snp_run_vmpl(svm, vmpl); + break; + } case GHCB_MSR_TERM_REQ: { u64 reason_set, reason_code; @@ -4887,6 +4951,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) sev_get_apic_ids(svm); ret = 1; break; + case SVM_VMGEXIT_SNP_RUN_VMPL: + ret = sev_snp_run_vmpl(svm); + if (ret < 0) { + svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT); + ret = 1; + } + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", -- 2.53.0 From: Tom Lendacky Report the number of VMPL levels supported by SEV-SNP guests. Signed-off-by: Tom Lendacky Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/x86/kvm/svm/sev.c | 14 ++++++++++++-- arch/x86/kvm/svm/svm.c | 12 +++++++++++- arch/x86/kvm/svm/svm.h | 3 +++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b67566fcb69e..528c8bd3e8fc 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -114,6 +114,7 @@ static unsigned long sev_me_mask; static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; +static unsigned int vmpl_levels; static __always_inline void kvm_lockdep_assert_sev_lock_held(struct kvm *kvm) { @@ -3103,6 +3104,9 @@ void __init sev_hardware_setup(void) /* Set encryption bit location for SEV-ES guests */ sev_enc_bit = ebx & 0x3f; + /* Get the number of supported VMPL levels */ + vmpl_levels = (ebx >> 12) & 0xf; + /* Maximum number of encrypted guests supported simultaneously */ max_sev_asid = ecx; if (!max_sev_asid) @@ -3217,9 +3221,10 @@ void __init sev_hardware_setup(void) "disabled", min_sev_es_asid, max_sev_es_asid); if (boot_cpu_has(X86_FEATURE_SEV_SNP)) - pr_info("SEV-SNP %s (ASIDs %u - %u)\n", + pr_info("SEV-SNP %s (ASIDs %u - %u), VMPL Levels %u\n", str_enabled_disabled(sev_snp_supported), - min_snp_asid, max_snp_asid); + min_snp_asid, max_snp_asid, + vmpl_levels); sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; @@ -5852,3 +5857,8 @@ bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu) return blocked; } + +int sev_snp_max_planes(struct kvm *kvm) +{ + return vmpl_levels; +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2ae82dc058c9..705063c7f0f0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5321,6 +5321,16 @@ static void svm_free_plane(struct kvm_plane *plane) kfree(svm_plane); } +static unsigned svm_max_planes(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_AMD_SEV + if (____sev_snp_guest(kvm)) + return sev_snp_max_planes(kvm); +#endif + + return kvm_x86_default_max_planes(kvm); +} + struct kvm_x86_ops svm_x86_ops __initdata = { .name = KBUILD_MODNAME, @@ -5465,7 +5475,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = { .alloc_plane = svm_alloc_plane, .free_plane = svm_free_plane, - .max_planes = kvm_x86_default_max_planes, + .max_planes = svm_max_planes, }; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7e860f2abafb..7aba2cceb44d 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -1008,6 +1008,8 @@ static inline bool sev_snp_is_rinj_active(struct kvm_vcpu *vcpu) return is_sev_snp_guest(vcpu) && (sev->vmsa_features & SVM_SEV_FEAT_RESTRICTED_INJECTION); }; +int sev_nr_vcpu_planes(struct kvm *kvm); +int sev_snp_max_planes(struct kvm *kvm); #else static inline struct page *snp_safe_alloc_page_node(int node, gfp_t gfp) { @@ -1051,6 +1053,7 @@ static inline bool sev_snp_inject(enum inject_type type, struct kvm_vcpu *vcpu) static inline void sev_snp_cancel_injection(struct kvm_vcpu *vcpu) {} static inline bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu) { return false; } static inline bool sev_snp_is_rinj_active(struct kvm_vcpu *vcpu) { return false; } +static inline unsigned sev_snp_max_planes(struct kvm *kvm) { return 1; } #endif /* vmenter.S */ -- 2.53.0 From: Tom Lendacky Indicate full multi-VMPL support to the guest through the GHCB feature bitmap. Signed-off-by: Tom Lendacky Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/x86/kvm/svm/sev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 528c8bd3e8fc..0736e1c778d9 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -43,7 +43,8 @@ #define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | \ GHCB_HV_FT_SNP_AP_CREATION | \ GHCB_HV_FT_SNP_RINJ | \ - GHCB_HV_FT_APIC_ID_LIST) + GHCB_HV_FT_APIC_ID_LIST | \ + GHCB_HV_FT_SNP_MULTI_VMPL) /* * The GHCB spec essentially states that all non-zero error codes other than -- 2.53.0