Add some helper functions for handling multiple guest faults at the same time. This will be needed for VSIE, where a nested guest access also needs to access all the page tables that map it. Signed-off-by: Claudio Imbrenda --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/faultin.c | 148 +++++++++++++++++++++++++++++++ arch/s390/kvm/faultin.h | 92 +++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/kvm/kvm-s390.h | 2 + 6 files changed, 245 insertions(+), 2 deletions(-) create mode 100644 arch/s390/kvm/faultin.c create mode 100644 arch/s390/kvm/faultin.h diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f5f87dae0dd9..958a3b8c32d1 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -441,6 +441,7 @@ struct kvm_vcpu_arch { bool acrs_loaded; struct kvm_s390_pv_vcpu pv; union diag318_info diag318_info; + void *mc; /* Placeholder */ }; struct kvm_vm_stat { diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 21088265402c..1e2dcd3e2436 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -9,7 +9,7 @@ ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o -kvm-y += dat.o gmap.o +kvm-y += dat.o gmap.o faultin.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/faultin.c b/arch/s390/kvm/faultin.c new file mode 100644 index 000000000000..9795ed429097 --- /dev/null +++ b/arch/s390/kvm/faultin.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM guest fault handling. + * + * Copyright IBM Corp. 2025 + * Author(s): Claudio Imbrenda + */ +#include +#include + +#include "gmap.h" +#include "trace.h" +#include "faultin.h" + +bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu); + +/* + * kvm_s390_faultin_gfn() - handle a dat fault. + * @vcpu: the vCPU whose gmap is to be fixed up, or NULL if operating on the VM. + * @kvm: the VM whose gmap is to be fixed up, or NULL if operating on a vCPU. + * @f: the guest fault that needs to be resolved. + * + * Return: + * * 0 on success + * * < 0 in case of error + * * > 0 in case of guest exceptions + * + * Context: + * * The mm lock must not be held before calling + * * kvm->srcu must be held + * * may sleep + */ +int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fault *f) +{ + struct kvm_s390_mmu_cache *local_mc __free(kvm_s390_mmu_cache) = NULL; + struct kvm_s390_mmu_cache *mc = NULL; + struct kvm_memory_slot *slot; + unsigned long inv_seq; + int foll, rc = 0; + + foll = f->write_attempt ? FOLL_WRITE : 0; + foll |= f->attempt_pfault ? FOLL_NOWAIT : 0; + + if (vcpu) { + kvm = vcpu->kvm; + mc = vcpu->arch.mc; + } + + lockdep_assert_held(&kvm->srcu); + + scoped_guard(read_lock, &kvm->mmu_lock) { + if (gmap_try_fixup_minor(kvm->arch.gmap, f) == 0) + return 0; + } + + while (1) { + f->valid = false; + inv_seq = kvm->mmu_invalidate_seq; + /* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */ + smp_rmb(); + + if (vcpu) + slot = kvm_vcpu_gfn_to_memslot(vcpu, f->gfn); + else + slot = gfn_to_memslot(kvm, f->gfn); + f->pfn = __kvm_faultin_pfn(slot, f->gfn, foll, &f->writable, &f->page); + + /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ + if (f->pfn == KVM_PFN_ERR_NEEDS_IO) { + if (unlikely(!f->attempt_pfault)) + return -EAGAIN; + if (unlikely(!vcpu)) + return -EINVAL; + trace_kvm_s390_major_guest_pfault(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + vcpu->stat.pfault_sync++; + /* Could not setup async pfault, try again synchronously */ + foll &= ~FOLL_NOWAIT; + f->pfn = __kvm_faultin_pfn(slot, f->gfn, foll, &f->writable, &f->page); + } + + /* Access outside memory, addressing exception */ + if (is_noslot_pfn(f->pfn)) + return PGM_ADDRESSING; + /* Signal pending: try again */ + if (f->pfn == KVM_PFN_ERR_SIGPENDING) + return -EAGAIN; + /* Check if it's read-only memory; don't try to actually handle that case. */ + if (f->pfn == KVM_PFN_ERR_RO_FAULT) + return -EOPNOTSUPP; + /* Any other error */ + if (is_error_pfn(f->pfn)) + return -EFAULT; + + if (!mc) { + local_mc = kvm_s390_new_mmu_cache(); + if (!local_mc) + return -ENOMEM; + mc = local_mc; + } + + /* Loop, will automatically release the faulted page */ + if (mmu_invalidate_retry_gfn_unsafe(kvm, inv_seq, f->gfn)) { + kvm_release_faultin_page(kvm, f->page, true, false); + continue; + } + + scoped_guard(read_lock, &kvm->mmu_lock) { + if (!mmu_invalidate_retry_gfn(kvm, inv_seq, f->gfn)) { + f->valid = true; + rc = gmap_link(mc, kvm->arch.gmap, f); + kvm_release_faultin_page(kvm, f->page, !!rc, f->write_attempt); + f->page = NULL; + } + } + kvm_release_faultin_page(kvm, f->page, true, false); + + if (rc == -ENOMEM) { + rc = kvm_s390_mmu_cache_topup(mc); + if (rc) + return rc; + } else if (rc != -EAGAIN) { + return rc; + } + } +} + +int kvm_s390_get_guest_page(struct kvm *kvm, struct guest_fault *f, gfn_t gfn, bool w) +{ + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + int foll = w ? FOLL_WRITE : 0; + + f->write_attempt = w; + f->gfn = gfn; + f->pfn = __kvm_faultin_pfn(slot, gfn, foll, &f->writable, &f->page); + if (is_noslot_pfn(f->pfn)) + return PGM_ADDRESSING; + if (is_sigpending_pfn(f->pfn)) + return -EINTR; + if (f->pfn == KVM_PFN_ERR_NEEDS_IO) + return -EAGAIN; + if (is_error_pfn(f->pfn)) + return -EFAULT; + + f->valid = true; + return 0; +} diff --git a/arch/s390/kvm/faultin.h b/arch/s390/kvm/faultin.h new file mode 100644 index 000000000000..f86176d2769c --- /dev/null +++ b/arch/s390/kvm/faultin.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KVM guest fault handling. + * + * Copyright IBM Corp. 2025 + * Author(s): Claudio Imbrenda + */ + +#ifndef __KVM_S390_FAULTIN_H +#define __KVM_S390_FAULTIN_H + +#include + +#include "dat.h" + +int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fault *f); +int kvm_s390_get_guest_page(struct kvm *kvm, struct guest_fault *f, gfn_t gfn, bool w); + +static inline int kvm_s390_faultin_gfn_simple(struct kvm_vcpu *vcpu, struct kvm *kvm, + gfn_t gfn, bool wr) +{ + struct guest_fault f = { .gfn = gfn, .write_attempt = wr, }; + + return kvm_s390_faultin_gfn(vcpu, kvm, &f); +} + +static inline int kvm_s390_get_guest_page_and_read_gpa(struct kvm *kvm, struct guest_fault *f, + gpa_t gaddr, unsigned long *val) +{ + int rc; + + rc = kvm_s390_get_guest_page(kvm, f, gpa_to_gfn(gaddr), false); + if (rc) + return rc; + + *val = *(unsigned long *)phys_to_virt(pfn_to_phys(f->pfn) | offset_in_page(gaddr)); + + return 0; +} + +static inline void kvm_s390_release_multiple(struct kvm *kvm, struct guest_fault *guest_faults, + int n, bool ignore) +{ + int i; + + for (i = 0; i < n; i++) { + kvm_release_faultin_page(kvm, guest_faults[i].page, ignore, + guest_faults[i].write_attempt); + guest_faults[i].page = NULL; + } +} + +static inline bool kvm_s390_multiple_faults_need_retry(struct kvm *kvm, unsigned long seq, + struct guest_fault *guest_faults, int n, + bool unsafe) +{ + int i; + + for (i = 0; i < n; i++) { + if (!guest_faults[i].valid) + continue; + if (unsafe && mmu_invalidate_retry_gfn_unsafe(kvm, seq, guest_faults[i].gfn)) + return true; + if (!unsafe && mmu_invalidate_retry_gfn(kvm, seq, guest_faults[i].gfn)) + return true; + } + return false; +} + +static inline int kvm_s390_get_guest_pages(struct kvm *kvm, struct guest_fault *guest_faults, + gfn_t start, int n_pages, bool write_attempt) +{ + int i, rc; + + for (i = 0; i < n_pages; i++) { + rc = kvm_s390_get_guest_page(kvm, guest_faults + i, start + i, write_attempt); + if (rc) + break; + } + return rc; +} + +#define kvm_s390_release_faultin_array(kvm, array, ignore) \ + kvm_s390_release_multiple(kvm, array, ARRAY_SIZE(array), ignore) + +#define kvm_s390_array_needs_retry_unsafe(kvm, seq, array) \ + kvm_s390_multiple_faults_need_retry(kvm, seq, array, ARRAY_SIZE(array), true) + +#define kvm_s390_array_needs_retry_safe(kvm, seq, array) \ + kvm_s390_multiple_faults_need_retry(kvm, seq, array, ARRAY_SIZE(array), false) + +#endif /* __KVM_S390_FAULTIN_H */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2e34f993e3c5..d7eff75a53d0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4747,7 +4747,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) return true; } -static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) +bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) { hva_t hva; struct kvm_arch_async_pf arch; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c44fe0c3a097..f89f9f698df5 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -470,6 +470,8 @@ static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags); } +bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu); + /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); -- 2.51.1