Create a variant of fpregs_lock_and_load() that KVM can use in its vCPU entry code after preemption has been disabled. While basing it on the existing logic in vcpu_enter_guest(), ensure that fpregs_assert_state_consistent() always runs and sprinkle a few more assertions. Cc: stable@vger.kernel.org Fixes: 820a6ee944e7 ("kvm: x86: Add emulation for IA32_XFD", 2022-01-14) Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/api.h | 1 + arch/x86/kernel/fpu/core.c | 17 +++++++++++++++++ arch/x86/kvm/x86.c | 8 +------- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index cd6f194a912b..0820b2621416 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -147,6 +147,7 @@ extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); +extern void fpu_load_guest_fpstate(struct fpu_guest *gfpu); extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3ab27fb86618..a480fa8c65d5 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -878,6 +878,23 @@ void fpregs_lock_and_load(void) fpregs_assert_state_consistent(); } +void fpu_load_guest_fpstate(struct fpu_guest *gfpu) +{ +#ifdef CONFIG_X86_DEBUG_FPU + struct fpu *fpu = x86_task_fpu(current); + WARN_ON_ONCE(gfpu->fpstate != fpu->fpstate); +#endif + + lockdep_assert_preemption_disabled(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + fpregs_restore_userregs(); + + fpregs_assert_state_consistent(); + if (gfpu->xfd_err) + wrmsrq(MSR_IA32_XFD_ERR, gfpu->xfd_err); +} +EXPORT_SYMBOL_FOR_KVM(fpu_load_guest_fpstate); + #ifdef CONFIG_X86_DEBUG_FPU /* * If current FPU state according to its tracking (loaded FPU context on this diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ff8812f3a129..01d95192dfc5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11300,13 +11300,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); } - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); - - if (vcpu->arch.guest_fpu.xfd_err) - wrmsrq(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); - + fpu_load_guest_fpstate(&vcpu->arch.guest_fpu); kvm_load_xfeatures(vcpu, true); if (unlikely(vcpu->arch.switch_db_regs && -- 2.52.0 Until now, fpstate->xfd has acted as both the guest value and the value that the host used when executing XSAVES and XRSTORS. This is wrong: the data in the guest's FPU might not be initialized even if a bit is set in XFD and, when that happens, XRSTORing the guest FPU will fail with a #NM exception *on the host*. Instead, store the value of XFD together with XFD_ERR in struct fpu_guest; it will still be synchronized in fpu_load_guest_fpstate(), but the XRSTOR(S) operation will be able to load any valid state of the FPU independent of the XFD value. Cc: stable@vger.kernel.org Fixes: 820a6ee944e7 ("kvm: x86: Add emulation for IA32_XFD", 2022-01-14) Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/api.h | 6 ++---- arch/x86/include/asm/fpu/types.h | 7 +++++++ arch/x86/kernel/fpu/core.c | 19 ++++--------------- arch/x86/kernel/fpu/xstate.h | 18 ++++++++++-------- arch/x86/kvm/x86.c | 6 +++--- 5 files changed, 26 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 0820b2621416..ee9ba06b7dbe 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -152,11 +152,9 @@ extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures); #ifdef CONFIG_X86_64 -extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd); -extern void fpu_sync_guest_vmexit_xfd_state(void); +extern void fpu_sync_guest_vmexit_xfd_state(struct fpu_guest *gfpu); #else -static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { } -static inline void fpu_sync_guest_vmexit_xfd_state(void) { } +static inline void fpu_sync_guest_vmexit_xfd_state(struct fpu_guest *gfpu) { } #endif extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 93e99d2583d6..7abe231e2ffe 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -545,6 +545,13 @@ struct fpu_guest { */ u64 xfeatures; + /* + * @xfd: Save the guest value. Note that this is + * *not* fpstate->xfd, which is the value + * the host uses when doing XSAVE/XRSTOR. + */ + u64 xfd; + /* * @xfd_err: Save the guest value. */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a480fa8c65d5..ff17c96d290a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -317,16 +317,6 @@ int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures) EXPORT_SYMBOL_FOR_KVM(fpu_enable_guest_xfd_features); #ifdef CONFIG_X86_64 -void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) -{ - fpregs_lock(); - guest_fpu->fpstate->xfd = xfd; - if (guest_fpu->fpstate->in_use) - xfd_update_state(guest_fpu->fpstate); - fpregs_unlock(); -} -EXPORT_SYMBOL_FOR_KVM(fpu_update_guest_xfd); - /** * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state * @@ -339,14 +329,12 @@ EXPORT_SYMBOL_FOR_KVM(fpu_update_guest_xfd); * Note: It can be invoked unconditionally even when write emulation is * enabled for the price of a then pointless MSR read. */ -void fpu_sync_guest_vmexit_xfd_state(void) +void fpu_sync_guest_vmexit_xfd_state(struct fpu_guest *gfpu) { - struct fpstate *fpstate = x86_task_fpu(current)->fpstate; - lockdep_assert_irqs_disabled(); if (fpu_state_size_dynamic()) { - rdmsrq(MSR_IA32_XFD, fpstate->xfd); - __this_cpu_write(xfd_state, fpstate->xfd); + rdmsrq(MSR_IA32_XFD, gfpu->xfd); + __this_cpu_write(xfd_state, gfpu->xfd); } } EXPORT_SYMBOL_FOR_KVM(fpu_sync_guest_vmexit_xfd_state); @@ -890,6 +878,7 @@ void fpu_load_guest_fpstate(struct fpu_guest *gfpu) fpregs_restore_userregs(); fpregs_assert_state_consistent(); + xfd_set_state(gfpu->xfd); if (gfpu->xfd_err) wrmsrq(MSR_IA32_XFD_ERR, gfpu->xfd_err); } diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 52ce19289989..c0ce05bee637 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -180,26 +180,28 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs #endif #ifdef CONFIG_X86_64 -static inline void xfd_set_state(u64 xfd) +static inline void __xfd_set_state(u64 xfd) { wrmsrq(MSR_IA32_XFD, xfd); __this_cpu_write(xfd_state, xfd); } +static inline void xfd_set_state(u64 xfd) +{ + if (__this_cpu_read(xfd_state) != xfd) + __xfd_set_state(xfd); +} + static inline void xfd_update_state(struct fpstate *fpstate) { - if (fpu_state_size_dynamic()) { - u64 xfd = fpstate->xfd; - - if (__this_cpu_read(xfd_state) != xfd) - xfd_set_state(xfd); - } + if (fpu_state_size_dynamic()) + xfd_set_state(fpstate->xfd); } extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu); #else static inline void xfd_set_state(u64 xfd) { } - +static inline void __xfd_set_state(u64 xfd) { } static inline void xfd_update_state(struct fpstate *fpstate) { } static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 01d95192dfc5..56fd082859bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4261,7 +4261,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~kvm_guest_supported_xfd(vcpu)) return 1; - fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data); + vcpu->arch.guest_fpu.xfd = data; break; case MSR_IA32_XFD_ERR: if (!msr_info->host_initiated && @@ -4617,7 +4617,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD)) return 1; - msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd; + msr_info->data = vcpu->arch.guest_fpu.xfd; break; case MSR_IA32_XFD_ERR: if (!msr_info->host_initiated && @@ -11405,7 +11405,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * in #NM irqoff handler). */ if (vcpu->arch.xfd_no_write_intercept) - fpu_sync_guest_vmexit_xfd_state(); + fpu_sync_guest_vmexit_xfd_state(&vcpu->arch.guest_fpu); kvm_x86_call(handle_exit_irqoff)(vcpu); -- 2.52.0 Make room for the next test; separated for ease of review. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86/amx_test.c | 26 ++++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index f4ce5a185a7d..dd980cdac5df 100644 --- a/tools/testing/selftests/kvm/x86/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -144,7 +144,7 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, __tileloadd(tiledata); GUEST_SYNC(4); __tilerelease(); - GUEST_SYNC(5); + GUEST_SYNC(10); /* * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in * the xcomp_bv. @@ -154,6 +154,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA)); GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA); + /* #NM test */ + /* xfd=0x40000, disable amx tiledata */ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA); @@ -166,13 +168,13 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA)); GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA)); - GUEST_SYNC(6); + GUEST_SYNC(11); GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); set_tilecfg(amx_cfg); __ldtilecfg(amx_cfg); /* Trigger #NM exception */ __tileloadd(tiledata); - GUEST_SYNC(10); + GUEST_SYNC(15); GUEST_DONE(); } @@ -180,18 +182,18 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, void guest_nm_handler(struct ex_regs *regs) { /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */ - GUEST_SYNC(7); + GUEST_SYNC(12); GUEST_ASSERT(!(get_cr0() & X86_CR0_TS)); GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA); GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); - GUEST_SYNC(8); + GUEST_SYNC(13); GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA); GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); /* Clear xfd_err */ wrmsr(MSR_IA32_XFD_ERR, 0); /* xfd=0, enable amx */ wrmsr(MSR_IA32_XFD, 0); - GUEST_SYNC(9); + GUEST_SYNC(14); } int main(int argc, char *argv[]) @@ -257,14 +259,14 @@ int main(int argc, char *argv[]) case 1: case 2: case 3: - case 5: - case 6: - case 7: - case 8: + case 10: + case 11: + case 12: + case 13: fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]); break; case 4: - case 10: + case 15: fprintf(stderr, "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]); @@ -280,7 +282,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); kvm_x86_state_cleanup(state); break; - case 9: + case 14: fprintf(stderr, "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]); break; -- 2.52.0 The host is allowed to set FPU state that includes a disabled xstate component. Check that this does not cause bad effects. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86/amx_test.c | 25 +++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index dd980cdac5df..5222ec6f71d3 100644 --- a/tools/testing/selftests/kvm/x86/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -142,7 +142,16 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, GUEST_SYNC(3); /* Check save/restore when trap to userspace */ __tileloadd(tiledata); + GUEST_SYNC(4); + /* xfd=0x40000, disable amx tiledata */ + wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA); + + GUEST_SYNC(5); + /* host tries setting tiledata while guest XFD is set */ + GUEST_SYNC(6); + + wrmsr(MSR_IA32_XFD, 0); __tilerelease(); GUEST_SYNC(10); /* @@ -202,6 +211,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_x86_state *state; + struct kvm_x86_state *tile_state = NULL; int xsave_restore_size; vm_vaddr_t amx_cfg, tiledata, xstate; struct ucall uc; @@ -259,6 +269,7 @@ int main(int argc, char *argv[]) case 1: case 2: case 3: + case 6: case 10: case 11: case 12: @@ -267,8 +278,7 @@ int main(int argc, char *argv[]) break; case 4: case 15: - fprintf(stderr, - "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]); + fprintf(stderr, "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]); /* Compacted mode, get amx offset by xsave area * size subtract 8K amx size. @@ -280,8 +290,17 @@ int main(int argc, char *argv[]) /* Only check TMM0 register, 1 tile */ ret = memcmp(amx_start, tiles_data, TILE_SIZE); TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); - kvm_x86_state_cleanup(state); + if (uc.args[1] == 4) + tile_state = state; + else + kvm_x86_state_cleanup(state); break; + case 5: + fprintf(stderr, "GUEST_SYNC(%ld), before KVM_SET_XSAVE\n", uc.args[1]); + vcpu_xsave_set(vcpu, tile_state->xsave); + fprintf(stderr, "GUEST_SYNC(%ld), after KVM_SET_XSAVE\n", uc.args[1]); + /* do not restore full state */ + continue; case 14: fprintf(stderr, "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]); -- 2.52.0 The only difference is the usage of switch_fpu_return() vs. fpregs_restore_userregs(). In turn, these are only different if there is no FPU at all, but KVM requires one. Therefore use the pre-made export---the code is simpler and there is no functional change. Signed-off-by: Paolo Bonzini --- arch/x86/kernel/fpu/core.c | 2 +-- arch/x86/kvm/fpu.h | 6 +----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ff17c96d290a..6571952c6ef1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -846,7 +846,6 @@ void switch_fpu_return(void) fpregs_restore_userregs(); } -EXPORT_SYMBOL_FOR_KVM(switch_fpu_return); void fpregs_lock_and_load(void) { @@ -865,6 +864,7 @@ void fpregs_lock_and_load(void) fpregs_assert_state_consistent(); } +EXPORT_SYMBOL_FOR_KVM(fpregs_lock_and_load); void fpu_load_guest_fpstate(struct fpu_guest *gfpu) { @@ -899,7 +899,6 @@ void fpregs_assert_state_consistent(void) WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); } -EXPORT_SYMBOL_FOR_KVM(fpregs_assert_state_consistent); #endif void fpregs_mark_activate(void) diff --git a/arch/x86/kvm/fpu.h b/arch/x86/kvm/fpu.h index f898781b6a06..b6a03d8fa8af 100644 --- a/arch/x86/kvm/fpu.h +++ b/arch/x86/kvm/fpu.h @@ -149,11 +149,7 @@ static inline void _kvm_write_mmx_reg(int reg, const u64 *data) static inline void kvm_fpu_get(void) { - fpregs_lock(); - - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); + fpregs_lock_and_load(); } static inline void kvm_fpu_put(void) -- 2.52.0