From: "Mike Rapoport (Microsoft)" Implementation of UFFDIO_COPY for anonymous memory might fail to copy data from userspace buffer when the destination VMA is locked (either with mm_lock or with per-VMA lock). In that case, mfill_atomic() releases the locks, retries copying the data with locks dropped and then re-locks the destination VMA and re-establishes PMD. Since this retry-reget dance is only relevant for UFFDIO_COPY and it never happens for other UFFDIO_ operations, make it a part of mfill_atomic_pte_copy() that actually implements UFFDIO_COPY for anonymous memory. As a temporal safety measure to avoid breaking biscection mfill_atomic_pte_copy() makes sure to never return -ENOENT so that the loop in mfill_atomic() won't retry copiyng outside of mmap_lock. This is removed later when shmem implementation will be updated later and the loop in mfill_atomic() will be adjusted. Signed-off-by: Mike Rapoport (Microsoft) --- mm/userfaultfd.c | 78 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 24 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index baff11e83101..828f252c720c 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -159,6 +159,9 @@ static void uffd_mfill_unlock(struct vm_area_struct *vma) static void mfill_put_vma(struct mfill_state *state) { + if (!state->vma) + return; + up_read(&state->ctx->map_changing_lock); uffd_mfill_unlock(state->vma); state->vma = NULL; @@ -404,35 +407,63 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr) return ret; } +static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio) +{ + unsigned long src_addr = state->src_addr; + void *kaddr; + int err; + + /* retry copying with mm_lock dropped */ + mfill_put_vma(state); + + kaddr = kmap_local_folio(folio, 0); + err = copy_from_user(kaddr, (const void __user *) src_addr, PAGE_SIZE); + kunmap_local(kaddr); + if (unlikely(err)) + return -EFAULT; + + flush_dcache_folio(folio); + + /* reget VMA and PMD, they could change underneath us */ + err = mfill_get_vma(state); + if (err) + return err; + + err = mfill_get_pmd(state); + if (err) + return err; + + return 0; +} + static int mfill_atomic_pte_copy(struct mfill_state *state) { - struct vm_area_struct *dst_vma = state->vma; unsigned long dst_addr = state->dst_addr; unsigned long src_addr = state->src_addr; uffd_flags_t flags = state->flags; - pmd_t *dst_pmd = state->pmd; struct folio *folio; int ret; - if (!state->folio) { - ret = -ENOMEM; - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma, - dst_addr); - if (!folio) - goto out; + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, state->vma, dst_addr); + if (!folio) + return -ENOMEM; - ret = mfill_copy_folio_locked(folio, src_addr); + ret = -ENOMEM; + if (mem_cgroup_charge(folio, state->vma->vm_mm, GFP_KERNEL)) + goto out_release; - /* fallback to copy_from_user outside mmap_lock */ - if (unlikely(ret)) { - ret = -ENOENT; - state->folio = folio; - /* don't free the page */ - goto out; - } - } else { - folio = state->folio; - state->folio = NULL; + ret = mfill_copy_folio_locked(folio, src_addr); + if (unlikely(ret)) { + /* + * Fallback to copy_from_user outside mmap_lock. + * If retry is successful, mfill_copy_folio_locked() returns + * with locks retaken by mfill_get_vma(). + * If there was an error, we must mfill_put_vma() anyway and it + * will take care of unlocking if needed. + */ + ret = mfill_copy_folio_retry(state, folio); + if (ret) + goto out_release; } /* @@ -442,17 +473,16 @@ static int mfill_atomic_pte_copy(struct mfill_state *state) */ __folio_mark_uptodate(folio); - ret = -ENOMEM; - if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL)) - goto out_release; - - ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, + ret = mfill_atomic_install_pte(state->pmd, state->vma, dst_addr, &folio->page, true, flags); if (ret) goto out_release; out: return ret; out_release: + /* Don't return -ENOENT so that our caller won't retry */ + if (ret == -ENOENT) + ret = -EFAULT; folio_put(folio); goto out; } -- 2.51.0