The Secure AVIC hardware requires uninterrupted access to the guest's APIC backing page. If this page is not present in the Nested Page Table (NPT) during a hardware access, a non-recoverable nested page fault occurs. This sets a BUSY flag in the VMSA and causes subsequent VMRUNs to fail with an unrecoverable VMEXIT_BUSY, effectively killing the vCPU. This situation can arise if the backing page resides within a 2MB large page in the NPT. If other parts of that large page are modified (e.g., memory state changes), KVM would split the 2MB NPT entry into 4KB entries. This process can temporarily zap the PTE for the backing page, creating a window for the fatal hardware access. Introduce a new GHCB VMGEXIT protocol, SVM_VMGEXIT_SECURE_AVIC, to allow the guest to explicitly inform KVM of the APIC backing page's location, thereby enabling KVM to guarantee its presence in the NPT. Implement two actions for this protocol: - SVM_VMGEXIT_SAVIC_REGISTER_BACKING_PAGE: On this request, KVM receives the GPA of the backing page. To prevent the 2MB page-split issue, immediately perform a PSMASH on the GPA by calling sev_handle_rmp_fault(). This proactively breaks any containing 2MB NPT entry into 4KB pages, isolating the backing page's PTE and guaranteeing its presence. Store the GPA for future reference. - SVM_VMGEXIT_SAVIC_UNREGISTER_BACKING_PAGE: On this request, clear the stored GPA, releasing KVM from its obligation to maintain the NPT entry. Return the previously registered GPA to the guest. This mechanism ensures the stability of the APIC backing page mapping, which is critical for the correct operation of Secure AVIC. Co-developed-by: Kishon Vijay Abraham I Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/include/uapi/asm/svm.h | 3 ++ arch/x86/kvm/svm/sev.c | 59 +++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 63 insertions(+) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 9c640a521a67..f1ef52e0fab1 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -118,6 +118,9 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SECURE_AVIC 0x8000001a +#define SVM_VMGEXIT_SAVIC_REGISTER_BACKING_PAGE 0 +#define SVM_VMGEXIT_SAVIC_UNREGISTER_BACKING_PAGE 1 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 7c66aefe428a..3e9cc50f2705 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3399,6 +3399,15 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) !kvm_ghcb_rcx_is_valid(svm)) goto vmgexit_err; break; + case SVM_VMGEXIT_SECURE_AVIC: + if (!sev_savic_active(vcpu->kvm)) + goto vmgexit_err; + if (!kvm_ghcb_rax_is_valid(svm)) + goto vmgexit_err; + if (svm->vmcb->control.exit_info_1 == SVM_VMGEXIT_SAVIC_REGISTER_BACKING_PAGE) + if (!kvm_ghcb_rbx_is_valid(svm)) + goto vmgexit_err; + break; case SVM_VMGEXIT_MMIO_READ: case SVM_VMGEXIT_MMIO_WRITE: if (!kvm_ghcb_sw_scratch_is_valid(svm)) @@ -4490,6 +4499,53 @@ static bool savic_handle_msr_exit(struct kvm_vcpu *vcpu) return false; } +static int sev_handle_savic_vmgexit(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = NULL; + u64 apic_id; + + apic_id = kvm_rax_read(&svm->vcpu); + + if (apic_id == -1ULL) { + vcpu = &svm->vcpu; + } else { + vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id); + if (!vcpu) + goto savic_request_invalid; + } + + switch (svm->vmcb->control.exit_info_1) { + case SVM_VMGEXIT_SAVIC_REGISTER_BACKING_PAGE: + gpa_t gpa; + + gpa = kvm_rbx_read(&svm->vcpu); + if (!PAGE_ALIGNED(gpa)) + goto savic_request_invalid; + + /* + * sev_handle_rmp_fault() invocation would result in PSMASH if + * NPTE size is 2M. + */ + sev_handle_rmp_fault(vcpu, gpa, 0); + to_svm(vcpu)->sev_savic_gpa = gpa; + break; + case SVM_VMGEXIT_SAVIC_UNREGISTER_BACKING_PAGE: + kvm_rbx_write(&svm->vcpu, to_svm(vcpu)->sev_savic_gpa); + to_svm(vcpu)->sev_savic_gpa = 0; + break; + default: + goto savic_request_invalid; + } + + return 1; + +savic_request_invalid: + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); + + return 1; +} + int sev_handle_vmgexit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4628,6 +4684,9 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) control->exit_info_1, control->exit_info_2); ret = -EINVAL; break; + case SVM_VMGEXIT_SECURE_AVIC: + ret = sev_handle_savic_vmgexit(svm); + break; case SVM_EXIT_MSR: if (sev_savic_active(vcpu->kvm) && savic_handle_msr_exit(vcpu)) return 1; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index a3edb6e720cd..8043833a1a8c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -337,6 +337,7 @@ struct vcpu_svm { bool guest_gif; bool sev_savic_has_pending_ipi; + gpa_t sev_savic_gpa; }; struct svm_cpu_data { -- 2.34.1