Specify which operation is being performed to anon_vma_clone(), which allows us to do checks specific to each operation type, as well as to separate out and make clear that the anon_vma reuse logic is absolutely specific to fork only. This opens the door to further refactorings and refinements later as we have more information to work with. Signed-off-by: Lorenzo Stoakes --- mm/internal.h | 11 +++++- mm/rmap.c | 67 ++++++++++++++++++++++---------- mm/vma.c | 6 +-- tools/testing/vma/vma_internal.h | 11 +++++- 4 files changed, 69 insertions(+), 26 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 469d4ef1ccc5..b4d4bca0f9a7 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -244,7 +244,16 @@ static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) struct anon_vma *folio_get_anon_vma(const struct folio *folio); -int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src); +/* Operations which modify VMAs. */ +enum vma_operation { + VMA_OP_SPLIT, + VMA_OP_MERGE_UNFAULTED, + VMA_OP_REMAP, + VMA_OP_FORK, +}; + +int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src, + enum vma_operation operation); int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma); int __anon_vma_prepare(struct vm_area_struct *vma); void unlink_anon_vmas(struct vm_area_struct *vma); diff --git a/mm/rmap.c b/mm/rmap.c index de9de6d71c23..f08e6bc57379 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -232,12 +232,13 @@ int __anon_vma_prepare(struct vm_area_struct *vma) } static void check_anon_vma_clone(struct vm_area_struct *dst, - struct vm_area_struct *src) + struct vm_area_struct *src, + enum vma_operation operation) { /* The write lock must be held. */ mmap_assert_write_locked(src->vm_mm); - /* If not a fork (implied by dst->anon_vma) then must be on same mm. */ - VM_WARN_ON_ONCE(dst->anon_vma && dst->vm_mm != src->vm_mm); + /* If not a fork then must be on same mm. */ + VM_WARN_ON_ONCE(operation != VMA_OP_FORK && dst->vm_mm != src->vm_mm); /* No source anon_vma is a no-op. */ VM_WARN_ON_ONCE(!src->anon_vma && !list_empty(&src->anon_vma_chain)); @@ -249,6 +250,35 @@ static void check_anon_vma_clone(struct vm_area_struct *dst, * must be the same across dst and src. */ VM_WARN_ON_ONCE(dst->anon_vma && dst->anon_vma != src->anon_vma); + + /* For the anon_vma to be compatible, it can only be singular. */ + VM_WARN_ON_ONCE(operation == VMA_OP_MERGE_UNFAULTED && + !list_is_singular(&src->anon_vma_chain)); +#ifdef CONFIG_PER_VMA_LOCK + /* Only merging an unfaulted VMA leaves the destination attached. */ + VM_WARN_ON_ONCE(operation != VMA_OP_MERGE_UNFAULTED && + vma_is_attached(dst)); +#endif +} + +static void find_reusable_anon_vma(struct vm_area_struct *vma, + struct anon_vma *anon_vma) +{ + /* If already populated, nothing to do.*/ + if (vma->anon_vma) + return; + + /* + * We reuse an anon_vma if any linking VMAs were unmapped and it has + * only a single child at most. + */ + if (anon_vma->num_active_vmas > 0) + return; + if (anon_vma->num_children > 1) + return; + + vma->anon_vma = anon_vma; + anon_vma->num_active_vmas++; } /** @@ -256,6 +286,7 @@ static void check_anon_vma_clone(struct vm_area_struct *dst, * all of the anon_vma objects contained within @src anon_vma_chain's. * @dst: The destination VMA with an empty anon_vma_chain. * @src: The source VMA we wish to duplicate. + * @operation: The type of operation which resulted in the clone. * * This is the heart of the VMA side of the anon_vma implementation - we invoke * this function whenever we need to set up a new VMA's anon_vma state. @@ -278,14 +309,16 @@ static void check_anon_vma_clone(struct vm_area_struct *dst, * * Returns: 0 on success, -ENOMEM on failure. */ -int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) +int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src, + enum vma_operation operation) { struct anon_vma_chain *avc, *pavc; + struct anon_vma *active_anon_vma = src->anon_vma; - if (!src->anon_vma) + if (!active_anon_vma) return 0; - check_anon_vma_clone(dst, src); + check_anon_vma_clone(dst, src, operation); /* * Allocate AVCs. We don't need an anon_vma lock for this as we @@ -309,22 +342,14 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) struct anon_vma *anon_vma = avc->anon_vma; anon_vma_interval_tree_insert(avc, &anon_vma->rb_root); - - /* - * Reuse existing anon_vma if it has no vma and only one - * anon_vma child. - * - * Root anon_vma is never reused: - * it has self-parent reference and at least one child. - */ - if (!dst->anon_vma && src->anon_vma && - anon_vma->num_children < 2 && - anon_vma->num_active_vmas == 0) - dst->anon_vma = anon_vma; + if (operation == VMA_OP_FORK) + find_reusable_anon_vma(dst, anon_vma); } - if (dst->anon_vma) + + if (operation != VMA_OP_FORK) dst->anon_vma->num_active_vmas++; - anon_vma_unlock_write(src->anon_vma); + + anon_vma_unlock_write(active_anon_vma); return 0; enomem_failure: @@ -361,7 +386,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) * First, attach the new VMA to the parent VMA's anon_vmas, * so rmap can find non-COWed pages in child processes. */ - error = anon_vma_clone(vma, pvma); + error = anon_vma_clone(vma, pvma, VMA_OP_FORK); if (error) return error; diff --git a/mm/vma.c b/mm/vma.c index feb4bbd3b259..e297c3a94133 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -525,7 +525,7 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, if (err) goto out_free_vmi; - err = anon_vma_clone(new, vma); + err = anon_vma_clone(new, vma, VMA_OP_SPLIT); if (err) goto out_free_mpol; @@ -623,7 +623,7 @@ static int dup_anon_vma(struct vm_area_struct *dst, vma_assert_write_locked(dst); dst->anon_vma = src->anon_vma; - ret = anon_vma_clone(dst, src); + ret = anon_vma_clone(dst, src, VMA_OP_MERGE_UNFAULTED); if (ret) return ret; @@ -1862,7 +1862,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, vma_set_range(new_vma, addr, addr + len, pgoff); if (vma_dup_policy(vma, new_vma)) goto out_free_vma; - if (anon_vma_clone(new_vma, vma)) + if (anon_vma_clone(new_vma, vma, VMA_OP_REMAP)) goto out_free_mempol; if (new_vma->vm_file) get_file(new_vma->vm_file); diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 93e5792306d9..7fa56dcc53a6 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -600,6 +600,14 @@ struct mmap_action { bool hide_from_rmap_until_complete :1; }; +/* Operations which modify VMAs. */ +enum vma_operation { + VMA_OP_SPLIT, + VMA_OP_MERGE_UNFAULTED, + VMA_OP_REMAP, + VMA_OP_FORK, +}; + /* * Describes a VMA that is about to be mmap()'ed. Drivers may choose to * manipulate mutable fields which will cause those fields to be updated in the @@ -1157,7 +1165,8 @@ static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_stru return 0; } -static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) +static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src, + enum vma_operation operation) { /* For testing purposes. We indicate that an anon_vma has been cloned. */ if (src->anon_vma != NULL) { -- 2.52.0