From: Kairui Song Remove the "skip if exists" check from commit a65b0e7607ccb ("zswap: make shrinking memcg-aware"). It was needed because there is a tiny time window between setting the SWAP_HAS_CACHE bit and actually adding the folio to the swap cache. If a user is trying to add the folio into the swap cache but another user was interrupted after setting SWAP_HAS_CACHE but hasn't added the folio to the swap cache yet, it might lead to a deadlock. We have moved the bit setting to the same critical section as adding the folio, so this is no longer needed. Remove it and clean it up. Signed-off-by: Kairui Song --- mm/swap.h | 2 +- mm/swap_state.c | 27 ++++++++++----------------- mm/zswap.c | 2 +- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/mm/swap.h b/mm/swap.h index 3cd99850bbaf..a3c5f2dca0d5 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -260,7 +260,7 @@ int swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void swap_cache_del_folio(struct folio *folio); struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags, struct mempolicy *mpol, pgoff_t ilx, - bool *alloced, bool skip_if_exists); + bool *alloced); /* Below helpers require the caller to lock and pass in the swap cluster. */ void __swap_cache_del_folio(struct swap_cluster_info *ci, struct folio *folio, swp_entry_t entry, void *shadow); diff --git a/mm/swap_state.c b/mm/swap_state.c index 2d53e3b5e8e9..d2bcca92b6e0 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -447,8 +447,6 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma, * @folio: folio to be added. * @gfp: memory allocation flags for charge, can be 0 if @charged if true. * @charged: if the folio is already charged. - * @skip_if_exists: if the slot is in a cached state, return NULL. - * This is an old workaround that will be removed shortly. * * Update the swap_map and add folio as swap cache, typically before swapin. * All swap slots covered by the folio must have a non-zero swap count. @@ -459,8 +457,7 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma, */ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, struct folio *folio, - gfp_t gfp, bool charged, - bool skip_if_exists) + gfp_t gfp, bool charged) { struct folio *swapcache = NULL; void *shadow; @@ -480,7 +477,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, * might return a folio that is irrelevant to the faulting * entry because @entry is aligned down. Just return NULL. */ - if (ret != -EEXIST || skip_if_exists || folio_test_large(folio)) + if (ret != -EEXIST || folio_test_large(folio)) goto failed; swapcache = swap_cache_get_folio(entry); @@ -513,8 +510,6 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, * @mpol: NUMA memory allocation policy to be applied * @ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE * @new_page_allocated: sets true if allocation happened, false otherwise - * @skip_if_exists: if the slot is a partially cached state, return NULL. - * This is a workaround that would be removed shortly. * * Allocate a folio in the swap cache for one swap slot, typically before * doing IO (swap in or swap out). The swap slot indicated by @entry must @@ -526,8 +521,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, */ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx, - bool *new_page_allocated, - bool skip_if_exists) + bool *new_page_allocated) { struct swap_info_struct *si = __swap_entry_to_info(entry); struct folio *folio; @@ -548,8 +542,7 @@ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask, if (!folio) return NULL; /* Try add the new folio, returns existing folio or NULL on failure. */ - result = __swap_cache_prepare_and_add(entry, folio, gfp_mask, - false, skip_if_exists); + result = __swap_cache_prepare_and_add(entry, folio, gfp_mask, false); if (result == folio) *new_page_allocated = true; else @@ -578,7 +571,7 @@ struct folio *swapin_folio(swp_entry_t entry, struct folio *folio) unsigned long nr_pages = folio_nr_pages(folio); entry = swp_entry(swp_type(entry), round_down(offset, nr_pages)); - swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true, false); + swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true); if (swapcache == folio) swap_read_folio(folio, NULL); return swapcache; @@ -606,7 +599,7 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, mpol = get_vma_policy(vma, addr, 0, &ilx); folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx, - &page_allocated, false); + &page_allocated); mpol_cond_put(mpol); if (page_allocated) @@ -725,7 +718,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, /* Ok, do the async read-ahead now */ folio = swap_cache_alloc_folio( swp_entry(swp_type(entry), offset), gfp_mask, mpol, ilx, - &page_allocated, false); + &page_allocated); if (!folio) continue; if (page_allocated) { @@ -743,7 +736,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, skip: /* The page was likely read above, so no need for plugging here */ folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx, - &page_allocated, false); + &page_allocated); if (unlikely(page_allocated)) swap_read_folio(folio, NULL); return folio; @@ -838,7 +831,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, pte_unmap(pte); pte = NULL; folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx, - &page_allocated, false); + &page_allocated); if (!folio) continue; if (page_allocated) { @@ -858,7 +851,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, skip: /* The folio was likely read above, so no need for plugging here */ folio = swap_cache_alloc_folio(targ_entry, gfp_mask, mpol, targ_ilx, - &page_allocated, false); + &page_allocated); if (unlikely(page_allocated)) swap_read_folio(folio, NULL); return folio; diff --git a/mm/zswap.c b/mm/zswap.c index a7a2443912f4..d8a33db9d3cc 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1015,7 +1015,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, mpol = get_task_policy(current); folio = swap_cache_alloc_folio(swpentry, GFP_KERNEL, mpol, - NO_INTERLEAVE_INDEX, &folio_was_allocated, true); + NO_INTERLEAVE_INDEX, &folio_was_allocated); put_swap_device(si); if (!folio) return -ENOMEM; -- 2.51.1