From: Joerg Roedel Streamline the VMSA setting state of vcpus, where a VMSA can be either KVM-allocated or guest-provided. This consolidates the various tracking state around VMSAs. Signed-off-by: Joerg Roedel --- arch/x86/kvm/svm/sev.c | 301 ++++++++++++++++++++++++++++------------- arch/x86/kvm/svm/svm.h | 31 ++++- 2 files changed, 237 insertions(+), 95 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6c6a6d663e29..9b1280222e20 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -147,6 +147,9 @@ static bool sev_snp_guest(struct kvm *kvm) } static int snp_decommission_context(struct kvm *kvm); +static int kvm_rmp_make_shared(struct kvm *kvm, u64 pfn, enum pg_level level); +static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va); +static int snp_page_reclaim(struct kvm *kvm, u64 pfn); struct enc_region { struct list_head list; @@ -156,6 +159,173 @@ struct enc_region { unsigned long size; }; +static void *sev_es_vmsa_ref(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + void *vmsa = NULL; + + if (svm->sev_es.vmsa.vmsa_state == VMSA_SHARED) { + vmsa = page_address(svm->sev_es.vmsa.vmsa_page); + } + + return vmsa; +} + +static int sev_es_vcpu_alloc_vmsa(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct page *vmsa_page; + + if (WARN_ON_ONCE(svm->sev_es.vmsa.vmsa_state != VMSA_NONE)) + return -EINVAL; + + /* + * SEV-ES guests require a separate (from the VMCB) VMSA page used to + * contain the encrypted register state of the guest. + */ + vmsa_page = snp_safe_alloc_page(); + if (!vmsa_page) + return -ENOMEM; + + svm->sev_es.vmsa.vmsa_state = VMSA_SHARED; + svm->sev_es.vmsa.vmsa_page = vmsa_page; + + return 0; +} + +static int sev_es_vcpu_vmsa_make_private(struct kvm_vcpu *vcpu) +{ + struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm); + struct vcpu_svm *svm = to_svm(vcpu); + void *vmsa = sev_es_vmsa_ref(vcpu); + + if (!vmsa) + return -EINVAL; + + if (is_sev_snp_guest(vcpu)) { + u64 pfn = __pa(vmsa) >> PAGE_SHIFT; + int ret; + + /* Transition the VMSA page to a firmware state. */ + ret = rmp_make_private(pfn, INITIAL_VMSA_GPA, PG_LEVEL_4K, sev->asid, true); + if (ret) + return ret; + } + + svm->sev_es.vmsa.vmsa_state = VMSA_PRIVATE; + + return 0; +} + +static void sev_es_vcpu_free_vmsa(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + void *vmsa_ptr; + + switch (svm->sev_es.vmsa.vmsa_state) { + case VMSA_NONE: + case VMSA_GUEST: + break; + case VMSA_PRIVATE: + vmsa_ptr = page_address(svm->sev_es.vmsa.vmsa_page); + + if (is_sev_snp_guest(vcpu)) { + u64 pfn = __pa(vmsa_ptr) >> PAGE_SHIFT; + + if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K)) { + pr_err("Failed to make VMSA page shared - leaking it to avoid re-use\n"); + goto out; + } + } + + if (vcpu->arch.guest_state_protected) + sev_flush_encrypted_page(vcpu, vmsa_ptr); + + fallthrough; + case VMSA_SHARED: + __free_page(svm->sev_es.vmsa.vmsa_page); + break; + default: + BUG(); + } +out: + + svm->sev_es.vmsa.vmsa_page = NULL; + svm->sev_es.vmsa.vmsa_state = VMSA_NONE; +} + +static void sev_snp_vcpu_reclaim_vmsa(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + void *vmsa_ptr; + u64 pfn; + + if (WARN_ON_ONCE(!is_sev_snp_guest(vcpu) || + svm->sev_es.vmsa.vmsa_state != VMSA_PRIVATE)) + return; + + vmsa_ptr = page_address(svm->sev_es.vmsa.vmsa_page); + pfn = __pa(vmsa_ptr) >> PAGE_SHIFT; + + if (!snp_page_reclaim(vcpu->kvm, pfn)) + __free_page(svm->sev_es.vmsa.vmsa_page); + + svm->sev_es.vmsa.vmsa_page = NULL; + svm->sev_es.vmsa.vmsa_state = VMSA_NONE; +} + +static void sev_es_set_guest_vmsa(struct kvm_vcpu *vcpu, gpa_t vmsa_gpa) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + sev_es_vcpu_free_vmsa(vcpu); + + svm->sev_es.vmsa.vmsa_state = VMSA_GUEST; + svm->sev_es.vmsa.vmsa_gpa = vmsa_gpa; +} + +static u64 sev_es_vmsa_pa(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + enum vmsa_state vmsa_state = svm->sev_es.vmsa.vmsa_state; + u64 vmsa_pa = INVALID_PAGE; + + if (vmsa_state == VMSA_GUEST) { + gpa_t vmsa_gpa = svm->sev_es.vmsa.vmsa_gpa; + struct kvm_memory_slot *slot; + struct page *page; + kvm_pfn_t pfn; + gfn_t gfn; + + gfn = gpa_to_gfn(vmsa_gpa); + + slot = gfn_to_memslot(vcpu->kvm, gfn); + if (!slot) + goto out; + + /* + * The new VMSA will be private memory guest memory, so retrieve the + * PFN from the gmem backend. + */ + if (kvm_gmem_get_pfn(vcpu->kvm, slot, gfn, &pfn, &page, NULL)) + goto out; + + vmsa_pa = pfn_to_hpa(pfn); + + /* + * gmem pages aren't currently migratable, but if this ever changes + * then care should be taken to ensure the guest vmsa is pinned + * through some other means. + */ + kvm_release_page_clean(page); + } else if (vmsa_state == VMSA_PRIVATE || vmsa_state == VMSA_SHARED) { + vmsa_pa = __pa(page_address(svm->sev_es.vmsa.vmsa_page)); + } + +out: + return vmsa_pa; +} + /* Called with the sev_bitmap_lock held, or on shutdown */ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) { @@ -925,7 +1095,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm); - struct sev_es_save_area *save = svm->sev_es.vmsa; + struct sev_es_save_area *save = sev_es_vmsa_ref(vcpu); struct xregs_state *xsave; const u8 *s; u8 *d; @@ -1026,6 +1196,7 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, { struct sev_data_launch_update_vmsa vmsa; struct vcpu_svm *svm = to_svm(vcpu); + void *vmsa_ref = sev_es_vmsa_ref(vcpu); int ret; if (vcpu->guest_debug) { @@ -1043,15 +1214,19 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, * the VMSA memory content (i.e it will write the same memory region * with the guest's key), so invalidate it first. */ - clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE); + clflush_cache_range(vmsa_ref, PAGE_SIZE); vmsa.reserved = 0; vmsa.handle = to_kvm_sev_info(kvm)->handle; - vmsa.address = __sme_pa(svm->sev_es.vmsa); + vmsa.address = __sme_pa(vmsa_ref); vmsa.len = PAGE_SIZE; ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); if (ret) - return ret; + goto free_vmsa; + + ret = sev_es_vcpu_vmsa_make_private(vcpu); + if (ret) + goto free_vmsa; /* * SEV-ES guests maintain an encrypted version of their FPU @@ -1069,7 +1244,13 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set. */ svm_enable_lbrv(vcpu); + return 0; + +free_vmsa: + sev_es_vcpu_free_vmsa(vcpu); + + return ret; } static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) @@ -2508,23 +2689,22 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) kvm_for_each_vcpu(i, vcpu, kvm) { struct vcpu_svm *svm = to_svm(vcpu); - u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT; + void *vmsa = sev_es_vmsa_ref(vcpu); ret = sev_es_sync_vmsa(svm); if (ret) goto out; - /* Transition the VMSA page to a firmware state. */ - ret = rmp_make_private(pfn, INITIAL_VMSA_GPA, PG_LEVEL_4K, sev->asid, true); + ret = sev_es_vcpu_vmsa_make_private(vcpu); if (ret) goto out; /* Issue the SNP command to encrypt the VMSA */ - data.address = __sme_pa(svm->sev_es.vmsa); + data.address = __sme_pa(vmsa); ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE, &data, &argp->error); if (ret) { - snp_page_reclaim(kvm, pfn); + sev_snp_vcpu_reclaim_vmsa(vcpu); goto out; } @@ -3593,31 +3773,13 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void sev_free_vcpu(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm; + struct vcpu_svm *svm = to_svm(vcpu); if (!is_sev_es_guest(vcpu)) return; - svm = to_svm(vcpu); - - /* - * If it's an SNP guest, then the VMSA was marked in the RMP table as - * a guest-owned page. Transition the page to hypervisor state before - * releasing it back to the system. - */ - if (is_sev_snp_guest(vcpu)) { - u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT; - - if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K)) - goto skip_vmsa_free; - } - - if (vcpu->arch.guest_state_protected) - sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); - - __free_page(virt_to_page(svm->sev_es.vmsa)); + sev_es_vcpu_free_vmsa(vcpu); -skip_vmsa_free: __sev_es_unmap_ghcb(svm); } @@ -4067,10 +4229,7 @@ static int snp_begin_psc(struct vcpu_svm *svm) static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_memory_slot *slot; - struct page *page; - kvm_pfn_t pfn; - gfn_t gfn; + u64 vmsa_pa; guard(mutex)(&svm->sev_es.snp_vmsa_mutex); @@ -4092,46 +4251,17 @@ static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) */ vmcb_mark_all_dirty(svm->vmcb); - if (!VALID_PAGE(svm->sev_es.snp_vmsa_gpa)) - return; - - gfn = gpa_to_gfn(svm->sev_es.snp_vmsa_gpa); - svm->sev_es.snp_vmsa_gpa = INVALID_PAGE; + sev_es_set_guest_vmsa(vcpu, svm->sev_es.req_vmsa_gpa); + vmsa_pa = sev_es_vmsa_pa(vcpu); - slot = gfn_to_memslot(vcpu->kvm, gfn); - if (!slot) + if (!VALID_PAGE(vmsa_pa)) return; - /* - * The new VMSA will be private memory guest memory, so retrieve the - * PFN from the gmem backend. - */ - if (kvm_gmem_get_pfn(vcpu->kvm, slot, gfn, &pfn, &page, NULL)) - return; - - /* - * From this point forward, the VMSA will always be a guest-mapped page - * rather than the initial one allocated by KVM in svm->sev_es.vmsa. In - * theory, svm->sev_es.vmsa could be free'd and cleaned up here, but - * that involves cleanups like flushing caches, which would ideally be - * handled during teardown rather than guest boot. Deferring that also - * allows the existing logic for SEV-ES VMSAs to be re-used with - * minimal SNP-specific changes. - */ - svm->sev_es.snp_has_guest_vmsa = true; - /* Use the new VMSA */ - svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn); + svm->vmcb->control.vmsa_pa = vmsa_pa; /* Mark the vCPU as runnable */ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); - - /* - * gmem pages aren't currently migratable, but if this ever changes - * then care should be taken to ensure svm->sev_es.vmsa is pinned - * through some other means. - */ - kvm_release_page_clean(page); } static int sev_snp_ap_creation(struct vcpu_svm *svm) @@ -4187,10 +4317,10 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm) return -EINVAL; } - target_svm->sev_es.snp_vmsa_gpa = svm->vmcb->control.exit_info_2; + target_svm->sev_es.req_vmsa_gpa = svm->vmcb->control.exit_info_2; break; case SVM_VMGEXIT_AP_DESTROY: - target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE; + target_svm->sev_es.req_vmsa_gpa = INVALID_PAGE; break; default: vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n", @@ -4708,20 +4838,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event) struct vmcb *vmcb = svm->vmcb01.ptr; svm->vmcb->control.misc_ctl |= SVM_MISC_ENABLE_SEV_ES; - - /* - * An SEV-ES guest requires a VMSA area that is a separate from the - * VMCB page. Do not include the encryption mask on the VMSA physical - * address since hardware will access it using the guest key. Note, - * the VMSA will be NULL if this vCPU is the destination for intrahost - * migration, and will be copied later. - */ - if (!svm->sev_es.snp_has_guest_vmsa) { - if (svm->sev_es.vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); - else - svm->vmcb->control.vmsa_pa = INVALID_PAGE; - } + svm->vmcb->control.vmsa_pa = sev_es_vmsa_pa(&svm->vcpu); if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | @@ -4797,7 +4914,7 @@ void sev_init_vmcb(struct vcpu_svm *svm, bool init_event) int sev_vcpu_create(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct page *vmsa_page; + int ret; mutex_init(&svm->sev_es.snp_vmsa_mutex); @@ -4808,11 +4925,9 @@ int sev_vcpu_create(struct kvm_vcpu *vcpu) * SEV-ES guests require a separate (from the VMCB) VMSA page used to * contain the encrypted register state of the guest. */ - vmsa_page = snp_safe_alloc_page(); - if (!vmsa_page) - return -ENOMEM; - - svm->sev_es.vmsa = page_address(vmsa_page); + ret = sev_es_vcpu_alloc_vmsa(vcpu); + if (ret) + return ret; vcpu->arch.guest_tsc_protected = snp_is_secure_tsc_enabled(vcpu->kvm); @@ -5227,12 +5342,14 @@ struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu) if (!is_sev_es_guest(vcpu)) return NULL; + vmsa = sev_es_vmsa_ref(vcpu); + /* * If the VMSA has not yet been encrypted, return a pointer to the * current un-encrypted VMSA. */ - if (!vcpu->arch.guest_state_protected) - return (struct vmcb_save_area *)svm->sev_es.vmsa; + if (vmsa) + return vmsa; sev = to_kvm_sev_info(vcpu->kvm); @@ -5303,8 +5420,10 @@ struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu) void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa) { + struct vmcb_save_area *vmsa_ptr = sev_es_vmsa_ref(vcpu); + /* If the VMSA has not yet been encrypted, nothing was allocated */ - if (!vcpu->arch.guest_state_protected || !vmsa) + if (vmsa == vmsa_ptr) return; free_page((unsigned long)vmsa); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 5137416be593..3d4799f09b23 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -240,9 +240,29 @@ struct svm_nested_state { bool force_msr_bitmap_recalc; }; +enum vmsa_state { + /* No VMSA set */ + VMSA_NONE, + /* VMSA allocated by KVM - Shared in RMP (if applicable) */ + VMSA_SHARED, + /* VMSA allocated by KVM - Guest-private in RMP (SEV-SNP only) */ + VMSA_PRIVATE, + /* Guest-owned VMSA */ + VMSA_GUEST, +}; + +struct sev_es_vmsa_state { + enum vmsa_state vmsa_state; + union { + /* state == (KVM_SHARED || KVM_PRIVATE) */ + struct page *vmsa_page; + /* state == GUEST */ + gpa_t vmsa_gpa; + }; +}; + struct vcpu_sev_es_state { /* SEV-ES support */ - struct sev_es_save_area *vmsa; struct ghcb *ghcb; u8 valid_bitmap[16]; struct kvm_host_map ghcb_map; @@ -266,10 +286,13 @@ struct vcpu_sev_es_state { u64 ghcb_registered_gpa; - struct mutex snp_vmsa_mutex; /* Used to handle concurrent updates of VMSA. */ - gpa_t snp_vmsa_gpa; + /* VMSA related state */ + struct mutex snp_vmsa_mutex; /* Used to handle concurrent updates of VMSA. */ + struct sev_es_vmsa_state vmsa; /* VMSA currently used by the VCPU */ + gpa_t req_vmsa_gpa; /* Requested new VMSA GPA */ + + bool snp_ap_runnable; bool snp_ap_waiting_for_reset; - bool snp_has_guest_vmsa; }; struct vcpu_svm { -- 2.53.0