From: xu xin As preparation for KSM rmap optimizations, let's track the original linear_page_index() of a de-duplicated page in its ksm_rmap_item, so we can efficiently search for the page in an address space, avoiding scanning the entire address space. This was previously discussed in [1, 2]. To avoid growing ksm_rmap_item, let's squeeze it into the existing structure by overlying some members (oldchecksum, age, remaining_skips) that are only relevant while on the unstable tree. The new entry will only be relevant for entries in the stable tree. However, as the age information is read by should_skip_rmap_item() with the smart-scanning approach even while we have an entry in the stable tree, but the page changes (no longer a KSM page, for example due to COW), we have to change the handling there a bit. We'll calculate the linear page index in try_to_merge_with_ksm_page(), when adding it to the stable tree, and reset the index (to reset overlayed data) when removing an item from the stable tree -- in remove_rmap_item_from_tree(), remove_node_from_stable_tree() and break_cow(). To be specially clarified, the reason for resetting the stored index at break_cow() is: - When a page successfully becomes a KSM page (i.e., after stable_tree_append() sets STABLE_FLAG), both anon_vma and the index are stored and remain valid. - However, during the merging process, there are several failure paths where we already prepared an rmap item to be added to the stable tree, but must revert that as some part of the merge process failed. Examples include: * The second call to try_to_merge_with_ksm_page() fails in try_to_merge_two_pages(). * stable_tree_insert() fails in cmp_and_merge_page(). In such cases, break_cow() is invoked to break the COW mapping and discard the KSM state. Currently, break_cow() already contains a put_anon_vma(rmap_item->anon_vma) to release the reference taken during the aborted merge. Because the index is logically paired with anon_vma (both are only meaningful when the rmap_item is in a stable state), it must also be cleared (or reset) in break_cow() to avoid leaving stale linear_page_index values that could confuse subsequent rmap walks or scanning logic. [1] https://lore.kernel.org/all/adTPQSb-qSSHviJN@lucifer/ [2] https://lore.kernel.org/all/202604091806051535BJWZ_FTtdIm3Snk24ei_@zte.com.cn/ Signed-off-by: xu xin --- mm/ksm.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index 7d5b76478f0b..e0ba29e3c0a4 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -195,22 +195,28 @@ struct ksm_stable_node { * @node: rb node of this rmap_item in the unstable tree * @head: pointer to stable_node heading this list in the stable tree * @hlist: link into hlist of rmap_items hanging off that stable_node - * @age: number of scan iterations since creation - * @remaining_skips: how many scans to skip + * @age: number of scan iterations since creation (unstable node) + * @remaining_skips: how many scans to skip (unstable node) + * @linear_page_index: the original page's index before merged by KSM (stable node) */ struct ksm_rmap_item { struct ksm_rmap_item *rmap_list; union { - struct anon_vma *anon_vma; /* when stable */ + struct anon_vma *anon_vma; /* for reverse mapping, when stable */ #ifdef CONFIG_NUMA int nid; /* when node of unstable tree */ #endif }; struct mm_struct *mm; unsigned long address; /* + low bits used for flags below */ - unsigned int oldchecksum; /* when unstable */ - rmap_age_t age; - rmap_age_t remaining_skips; + union { + struct { + unsigned int oldchecksum; + rmap_age_t age; + rmap_age_t remaining_skips; + }; /* when unstable */ + unsigned long linear_page_index; /* for reverse mapping, when stable */ + }; union { struct rb_node node; /* when node of unstable tree */ struct { /* when listed from stable tree */ @@ -776,6 +782,11 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, return vma; } +/* + * break_cow: actively break COW, replacing the KSM page by a fresh anonymous + * page. This is called when rmap_item has not yet become stable, but page + * has been merged. + */ static void break_cow(struct ksm_rmap_item *rmap_item) { struct mm_struct *mm = rmap_item->mm; @@ -787,6 +798,11 @@ static void break_cow(struct ksm_rmap_item *rmap_item) * to undo, we also need to drop a reference to the anon_vma. */ put_anon_vma(rmap_item->anon_vma); + /* + * Reset linear_page_index that might overlay age-related + * information. (it's still unstable node) + */ + rmap_item->linear_page_index = 0; mmap_read_lock(mm); vma = find_mergeable_vma(mm, addr); @@ -899,6 +915,8 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node) VM_BUG_ON(stable_node->rmap_hlist_len <= 0); stable_node->rmap_hlist_len--; put_anon_vma(rmap_item->anon_vma); + /* Reset linear_page_index that might overlay age-related information. */ + rmap_item->linear_page_index = 0; rmap_item->address &= PAGE_MASK; cond_resched(); } @@ -1052,6 +1070,8 @@ static void remove_rmap_item_from_tree(struct ksm_rmap_item *rmap_item) stable_node->rmap_hlist_len--; put_anon_vma(rmap_item->anon_vma); + /* Reset linear_page_index that might overlay age-related information. */ + rmap_item->linear_page_index = 0; rmap_item->head = NULL; rmap_item->address &= PAGE_MASK; @@ -1598,8 +1618,16 @@ static int try_to_merge_with_ksm_page(struct ksm_rmap_item *rmap_item, /* Unstable nid is in union with stable anon_vma: remove first */ remove_rmap_item_from_tree(rmap_item); - /* Must get reference to anon_vma while still holding mmap_lock */ + /* + * Must get reference to anon_vma while still holding mmap_lock. + * Must can only reference the VMA while still holding the mmap + * lock, so reference the anon_vma and calculate the linear page + * index early, before stable_tree_append(). If anything goes + * wrong that prevents the rmap_item from being added to the + * stable_tree, break_cow() will clean it up. + */ rmap_item->anon_vma = vma->anon_vma; + rmap_item->linear_page_index = linear_page_index(vma, rmap_item->address); get_anon_vma(vma->anon_vma); out: mmap_read_unlock(mm); @@ -2458,6 +2486,13 @@ static bool should_skip_rmap_item(struct folio *folio, if (folio_test_ksm(folio)) return false; + /* + * There is no age information in stable-tree nodes. We might end up + * here without a KSM page for example after COW. + */ + if (rmap_item->address & STABLE_FLAG) + return false; + age = rmap_item->age; if (age != U8_MAX) rmap_item->age++; -- 2.25.1