If mmu_notifier_register() fails, for example because a signal was pending, the mmu_notifier will not be registered. But when the VM gets destroyed, it will get unregistered anyway and that will cause one extra mmdrop(), which will eventually cause the mm of the process to be freed too early, and cause a use-after free. This bug happens rarely, and only when secure guests are involved. The solution is to check the return value of mmu_notifier_register() and return it to the caller (ultimately it will be propagated all the way to userspace). In case of -EINTR, userspace will try again. Fixes: ca2fd0609b5d ("KVM: s390: pv: add mmu_notifier") Signed-off-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Steffen Eiden Reviewed-by: Christoph Schlameuss --- arch/s390/kvm/pv.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 14c330ec8ceb..e85fb3247b0e 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -622,6 +622,15 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) int cc, ret; u16 dummy; + /* Add the notifier only once. No races because we hold kvm->lock */ + if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { + ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); + if (ret) + return ret; + /* The notifier will be unregistered when the VM is destroyed */ + kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; + } + ret = kvm_s390_pv_alloc_vm(kvm); if (ret) return ret; @@ -657,11 +666,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return -EIO; } kvm->arch.gmap->guest_handle = uvcb.guest_handle; - /* Add the notifier only once. No races because we hold kvm->lock */ - if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { - kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; - mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); - } return 0; } -- 2.50.1 Pass the right type of flag to vcpu_dat_fault_handler(); it expects a FOLL_* flag (in particular FOLL_WRITE), but FAULT_FLAG_WRITE is passed instead. This still works because they happen to have the same integer value, but it's a mistake, thus the fix. Signed-off-by: Claudio Imbrenda Fixes: 05066cafa925 ("s390/mm/fault: Handle guest-related program interrupts in KVM") Acked-by: Christian Borntraeger Reviewed-by: David Hildenbrand --- arch/s390/kvm/kvm-s390.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d5ad10791c25..4280b25b6b04 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4863,12 +4863,12 @@ static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) * @vcpu: the vCPU whose gmap is to be fixed up * @gfn: the guest frame number used for memslots (including fake memslots) * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps - * @flags: FOLL_* flags + * @foll: FOLL_* flags * * Return: 0 on success, < 0 in case of error. * Context: The mm lock must not be held before calling. May sleep. */ -int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int foll) { struct kvm_memory_slot *slot; unsigned int fault_flags; @@ -4882,13 +4882,13 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return vcpu_post_run_addressing_exception(vcpu); - fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + fault_flags = foll & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; if (vcpu->arch.gmap->pfault_enabled) - flags |= FOLL_NOWAIT; + foll |= FOLL_NOWAIT; vmaddr = __gfn_to_hva_memslot(slot, gfn); try_again: - pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); + pfn = __kvm_faultin_pfn(slot, gfn, foll, &writable, &page); /* Access outside memory, inject addressing exception */ if (is_noslot_pfn(pfn)) @@ -4904,7 +4904,7 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u return 0; vcpu->stat.pfault_sync++; /* Could not setup async pfault, try again synchronously */ - flags &= ~FOLL_NOWAIT; + foll &= ~FOLL_NOWAIT; goto try_again; } /* Any other error */ @@ -4924,7 +4924,7 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u return rc; } -static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int foll) { unsigned long gaddr_tmp; gfn_t gfn; @@ -4949,18 +4949,18 @@ static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, un } gfn = gpa_to_gfn(gaddr_tmp); } - return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, foll); } static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { - unsigned int flags = 0; + unsigned int foll = 0; unsigned long gaddr; int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) - flags = FAULT_FLAG_WRITE; + foll = FOLL_WRITE; switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) { case 0: @@ -5002,7 +5002,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); if (rc != -ENXIO) break; - flags = FAULT_FLAG_WRITE; + foll = FOLL_WRITE; fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: @@ -5012,7 +5012,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: kvm_s390_assert_primary_as(vcpu); - return vcpu_dat_fault_handler(vcpu, gaddr, flags); + return vcpu_dat_fault_handler(vcpu, gaddr, foll); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); -- 2.50.1