Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:31:49 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 3509 Lines: 119 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:33:21 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 8250 Lines: 265 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:31:49 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 3509 Lines: 119 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:44:16 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 17732 Lines: 557 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:31:49 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 3509 Lines: 119 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:33:21 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 8250 Lines: 265 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:31:49 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 3509 Lines: 119 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:45:09 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 36697 Lines: 1141 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:31:49 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 3509 Lines: 119 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:33:21 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 8250 Lines: 265 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:31:49 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 3509 Lines: 119 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:44:16 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 17732 Lines: 557 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:31:49 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 3509 Lines: 119 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:33:21 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 8250 Lines: 265 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1 From haoli.tcs@gmail.com Tue Dec 2 16:24:49 2025 Date: Tue, 2 Dec 2025 16:31:49 +0800 From: Hao Li To: Vlastimil Babka Cc: Suren Baghdasaryan , "Liam R. Howlett" , Christoph Lameter , David Rientjes , Roman Gushchin , Harry Yoo , Uladzislau Rezki , Sidhartha Kumar , linux-mm@kvack.org, linux-kernel@vger.kernel.org, rcu@vger.kernel.org, maple-tree@lists.infradead.org, Venkat Rao Bagalkote Subject: [PATCH] slub: add barn_get_full_sheaf() and refine empty-main sheaf Message-ID: Mutt-References: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent References: <20250910-slub-percpu-caches-v8-0-ca3099d8352c@suse.cz> <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20250910-slub-percpu-caches-v8-3-ca3099d8352c@suse.cz> Mutt-Fcc: ~/sent Status: RO Content-Length: 3509 Lines: 119 Introduce barn_get_full_sheaf(), a helper that detaches a full sheaf from the per-node barn without requiring an empty sheaf in exchange. Use this helper in __pcs_replace_empty_main() to change how an empty main per-CPU sheaf is handled: - If pcs->spare is NULL and pcs->main is empty, first try to obtain a full sheaf from the barn via barn_get_full_sheaf(). On success, park the empty main sheaf in pcs->spare and install the full sheaf as the new pcs->main. - If pcs->spare already exists and has objects, keep the existing behavior of simply swapping pcs->main and pcs->spare. - Only when both pcs->main and pcs->spare are empty do we fall back to barn_replace_empty_sheaf() and trade the empty main sheaf into the barn in exchange for a full one. This makes the empty-main path more symmetric with __pcs_replace_full_main(), which for a full main sheaf parks the full sheaf in pcs->spare and pulls an empty sheaf from the barn. It also matches the documented design more closely: "When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn." Signed-off-by: Hao Li --- * This patch is based on b4/sheaves-for-all branch mm/slub.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a94c64f56504..1fd28aa204e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2746,6 +2746,32 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves = NULL; } +static struct slab_sheaf *barn_get_full_sheaf(struct node_barn *barn, + bool allow_spin) +{ + struct slab_sheaf *full = NULL; + unsigned long flags; + + if (!data_race(barn->nr_full)) + return NULL; + + if (likely(allow_spin)) + spin_lock_irqsave(&barn->lock, flags); + else if (!spin_trylock_irqsave(&barn->lock, flags)) + return NULL; + + if (likely(barn->nr_full)) { + full = list_first_entry(&barn->sheaves_full, + struct slab_sheaf, barn_list); + list_del(&full->barn_list); + barn->nr_full--; + } + + spin_unlock_irqrestore(&barn->lock, flags); + + return full; +} + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, bool allow_spin) { @@ -4120,7 +4146,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool can_alloc, allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4130,10 +4156,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - if (pcs->spare && pcs->spare->size > 0) { - swap(pcs->main, pcs->spare); - return pcs; - } + allow_spin = gfpflags_allow_spinning(gfp); barn = get_barn(s); if (!barn) { @@ -4141,8 +4164,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + if (!pcs->spare) { + full = barn_get_full_sheaf(barn, allow_spin); + if (full) { + pcs->spare = pcs->main; + pcs->main = full; + return pcs; + } + } else if (pcs->spare->size > 0) { + swap(pcs->main, pcs->spare); + return pcs; + } + + /* both main and spare are empty */ + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); -- 2.50.1