When a guest performs an atomic/exclusive operation on memory lacking the required attributes, user_mem_abort() injects a data abort and returns early. However, it fails to release the reference to the host page acquired via __kvm_faultin_pfn(). A malicious guest could repeatedly trigger this fault, leaking host page references and eventually causing host memory exhaustion (OOM). Fix this by consolidating the early error returns to a new out_put_page label that correctly calls kvm_release_page_unused(). Fixes: 2937aeec9dc5 ("KVM: arm64: Handle DABT caused by LS64* instructions on unsupported memory") Signed-off-by: Fuad Tabba --- arch/arm64/kvm/mmu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index ec2eee857208..e1d6a4f591a9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1837,10 +1837,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && s2_force_noncacheable) ret = -ENOEXEC; - if (ret) { - kvm_release_page_unused(page); - return ret; - } + if (ret) + goto out_put_page; /* * Guest performs atomic/exclusive operations on memory with unsupported @@ -1850,7 +1848,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(vcpu))) { kvm_inject_dabt_excl_atomic(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; + ret = 1; + goto out_put_page; } if (nested) @@ -1936,6 +1935,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, mark_page_dirty_in_slot(kvm, memslot, gfn); return ret != -EAGAIN ? ret : 0; + +out_put_page: + kvm_release_page_unused(page); + return ret; } /* Resolve the access fault by making the page young again. */ -- 2.53.0.473.g4a7958ca14-goog When user_mem_abort() handles a nested stage-2 fault, it truncates vma_pagesize to respect the guest's mapping size. However, the local variable vma_shift is never updated to match this new size. If the underlying host page turns out to be hardware poisoned, kvm_send_hwpoison_signal() is called with the original, larger vma_shift instead of the actual mapping size. This signals incorrect poison boundaries to userspace and breaks hugepage memory poison containment for nested VMs. Update vma_shift to match the truncated vma_pagesize when operating on behalf of a nested hypervisor. Fixes: fd276e71d1e7 ("KVM: arm64: nv: Handle shadow stage 2 page faults") Signed-off-by: Fuad Tabba --- arch/arm64/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e1d6a4f591a9..b08240e0cab1 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1751,6 +1751,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, force_pte = (max_map_size == PAGE_SIZE); vma_pagesize = min_t(long, vma_pagesize, max_map_size); + vma_shift = force_pte ? PAGE_SHIFT : __ffs(vma_pagesize); } /* -- 2.53.0.473.g4a7958ca14-goog