If mmu_notifier_register() fails, for example because a signal was pending, the mmu_notifier will not be registered. But when the VM gets destroyed, it will get unregistered anyway and that will cause one extra mmdrop(), which will eventually cause the mm of the process to be freed too early, and cause a use-after free. This bug happens rarely, and only when secure guests are involved. The solution is to check the return value of mmu_notifier_register() and return it to the caller (ultimately it will be propagated all the way to userspace). In case of -EINTR, userspace will try again. Fixes: ca2fd0609b5d ("KVM: s390: pv: add mmu_notifier") Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/pv.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 14c330ec8ceb..e85fb3247b0e 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -622,6 +622,15 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) int cc, ret; u16 dummy; + /* Add the notifier only once. No races because we hold kvm->lock */ + if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { + ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); + if (ret) + return ret; + /* The notifier will be unregistered when the VM is destroyed */ + kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; + } + ret = kvm_s390_pv_alloc_vm(kvm); if (ret) return ret; @@ -657,11 +666,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return -EIO; } kvm->arch.gmap->guest_handle = uvcb.guest_handle; - /* Add the notifier only once. No races because we hold kvm->lock */ - if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { - kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; - mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); - } return 0; } -- 2.50.1 Pass the right type of flag to vcpu_dat_fault_handler(); it expects a FOLL_* flag (in particular FOLL_WRITE), but FAULT_FLAG_WRITE is passed instead. This still works because they happen to have the same integer value, but it's a mistake, thus the fix. Signed-off-by: Claudio Imbrenda Fixes: 05066cafa925 ("s390/mm/fault: Handle guest-related program interrupts in KVM") --- arch/s390/kvm/kvm-s390.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d5ad10791c25..d41d77f2c7cd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4954,13 +4954,13 @@ static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, un static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { - unsigned int flags = 0; + unsigned int foll = 0; unsigned long gaddr; int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) - flags = FAULT_FLAG_WRITE; + foll = FOLL_WRITE; switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) { case 0: @@ -5002,7 +5002,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); if (rc != -ENXIO) break; - flags = FAULT_FLAG_WRITE; + foll = FOLL_WRITE; fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: @@ -5012,7 +5012,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: kvm_s390_assert_primary_as(vcpu); - return vcpu_dat_fault_handler(vcpu, gaddr, flags); + return vcpu_dat_fault_handler(vcpu, gaddr, foll); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); -- 2.50.1