In alloc_debug_processing() we know that the slab pointer passed in really is a pointer to a slab. Both callers modify members of the slab data structure. So we can remove this check. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Harry Yoo --- mm/slub.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 962110466ffc..ddf26f00b21d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1715,17 +1715,15 @@ static noinline bool alloc_debug_processing(struct kmem_cache *s, return true; bad: - if (folio_test_slab(slab_folio(slab))) { - /* - * If this is a slab page then lets do the best we can - * to avoid issues in the future. Marking all objects - * as used avoids touching the remaining objects. - */ - slab_fix(s, "Marking all objects used"); - slab->inuse = slab->objects; - slab->freelist = NULL; - slab->frozen = 1; /* mark consistency-failed slab as frozen */ - } + /* + * Lets do the best we can to avoid issues in the future. Marking + * all objects as used avoids touching the remaining objects. + */ + slab_fix(s, "Marking all objects used"); + slab->inuse = slab->objects; + slab->freelist = NULL; + slab->frozen = 1; /* mark consistency-failed slab as frozen */ + return false; } -- 2.47.2 We know this is a slab because we already modified slab->inuse just two lines earlier. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Harry Yoo --- mm/slub.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index ddf26f00b21d..81f08891741a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3323,8 +3323,7 @@ static void *alloc_single_from_partial(struct kmem_cache *s, slab->inuse++; if (!alloc_debug_processing(s, slab, object, orig_size)) { - if (folio_test_slab(slab_folio(slab))) - remove_partial(n, slab); + remove_partial(n, slab); return NULL; } -- 2.47.2 We already know the slab passed in is a valid slab. This looks like a sensible check, but all callers assume or check that the page belongs to slab. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Harry Yoo --- mm/slub.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 81f08891741a..ac879e9127a2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1515,11 +1515,6 @@ static int check_slab(struct kmem_cache *s, struct slab *slab) { int maxobj; - if (!folio_test_slab(slab_folio(slab))) { - slab_err(s, slab, "Not a valid slab page"); - return 0; - } - maxobj = order_objects(slab_order(slab), s->size); if (slab->objects > maxobj) { slab_err(s, slab, "objects %u > max %u", -- 2.47.2 We already know that slab is a valid slab as that's checked by the caller. In the future, we won't be able to get to a slab pointer from a non-slab page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Harry Yoo --- mm/slub.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index ac879e9127a2..b32bffdd6e3c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1739,10 +1739,7 @@ static inline int free_consistency_checks(struct kmem_cache *s, return 0; if (unlikely(s != slab->slab_cache)) { - if (!folio_test_slab(slab_folio(slab))) { - slab_err(s, slab, "Attempt to free object(0x%p) outside of slab", - object); - } else if (!slab->slab_cache) { + if (!slab->slab_cache) { slab_err(NULL, slab, "No slab cache for object 0x%p", object); } else { -- 2.47.2 These functions do not modify their arguments. Telling the compiler this may improve code generation, and allows us to pass const arguments from other functions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Acked-by: David Hildenbrand --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index a6bfa46937a8..38b3d7b06d01 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1036,9 +1036,9 @@ static inline long folio_large_nr_pages(const struct folio *folio) * set before the order is initialised, or this may be a tail page. * See compaction.c for some good examples. */ -static inline unsigned int compound_order(struct page *page) +static inline unsigned int compound_order(const struct page *page) { - struct folio *folio = (struct folio *)page; + const struct folio *folio = (struct folio *)page; if (!test_bit(PG_head, &folio->flags.f)) return 0; @@ -1256,7 +1256,7 @@ int folio_mc_copy(struct folio *dst, struct folio *src); unsigned long nr_free_buffer_pages(void); /* Returns the number of bytes in this potentially compound page. */ -static inline unsigned long page_size(struct page *page) +static inline unsigned long page_size(const struct page *page) { return PAGE_SIZE << compound_order(page); } -- 2.47.2 In order to separate slabs from folios, we need to convert from any page in a slab to the slab directly without going through a page to folio conversion first. page_slab() is a little different from other memdesc converters we have in that it will return NULL if the page is not part of a slab. This will be the normal style for memdesc converters in the future. kfence was the only user of page_slab(), so adjust it to the new way of working. It will need to be touched again when we separate slab from page. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/page-flags.h | 14 +------------- mm/kfence/core.c | 12 ++++++++---- mm/slab.h | 28 ++++++++++++++++------------ 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a57411252564..a997207c3b71 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1055,19 +1055,7 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) -FOLIO_TYPE_OPS(slab, slab) - -/** - * PageSlab - Determine if the page belongs to the slab allocator - * @page: The page to test. - * - * Context: Any context. - * Return: True for slab pages, false for any other kind of page. - */ -static inline bool PageSlab(const struct page *page) -{ - return folio_test_slab(page_folio(page)); -} +PAGE_TYPE_OPS(Slab, slab, slab) #ifdef CONFIG_HUGETLB_PAGE FOLIO_TYPE_OPS(hugetlb, hugetlb) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 727c20c94ac5..b16e73fd5b68 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -612,13 +612,15 @@ static unsigned long kfence_init_pool(void) * enters __slab_free() slow-path. */ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { + struct page *page; struct slab *slab; if (!i || (i % 2)) continue; - slab = page_slab(pfn_to_page(start_pfn + i)); - __folio_set_slab(slab_folio(slab)); + page = pfn_to_page(start_pfn + i); + __SetPageSlab(page); + slab = page_slab(page); #ifdef CONFIG_MEMCG slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts | MEMCG_DATA_OBJEXTS; @@ -665,16 +667,18 @@ static unsigned long kfence_init_pool(void) reset_slab: for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { + struct page *page; struct slab *slab; if (!i || (i % 2)) continue; - slab = page_slab(pfn_to_page(start_pfn + i)); + page = pfn_to_page(start_pfn + i); + slab = page_slab(page); #ifdef CONFIG_MEMCG slab->obj_exts = 0; #endif - __folio_clear_slab(slab_folio(slab)); + __ClearPageSlab(page); } return addr; diff --git a/mm/slab.h b/mm/slab.h index bf2f91a6c535..084231394250 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -142,20 +142,24 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t) struct slab *: (struct folio *)s)) /** - * page_slab - Converts from first struct page to slab. - * @p: The first (either head of compound or single) page of slab. + * page_slab - Converts from struct page to its slab. + * @page: A page which may or may not belong to a slab. * - * A temporary wrapper to convert struct page to struct slab in situations where - * we know the page is the compound head, or single order-0 page. - * - * Long-term ideally everything would work with struct slab directly or go - * through folio to struct slab. - * - * Return: The slab which contains this page + * Return: The slab which contains this page or NULL if the page does + * not belong to a slab. This includes pages returned from large kmalloc. */ -#define page_slab(p) (_Generic((p), \ - const struct page *: (const struct slab *)(p), \ - struct page *: (struct slab *)(p))) +static inline struct slab *page_slab(const struct page *page) +{ + unsigned long head; + + head = READ_ONCE(page->compound_head); + if (head & 1) + page = (struct page *)(head - 1); + if (data_race(page->page_type >> 24) != PGTY_slab) + page = NULL; + + return (struct slab *)page; +} /** * slab_page - The first struct page allocated for a slab -- 2.47.2 In the future, we will separate slab, folio and page from each other and calling virt_to_folio() on an address allocated from slab will return NULL. Delay the conversion from struct page to struct slab until we know we're not dealing with a large kmalloc allocation. This deprecates calling ksize() on memory allocated by alloc_pages(). Today it becomes a warning and support will be removed entirely in the future. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/page-flags.h | 2 +- mm/slab.h | 10 ++++++++++ mm/slab_common.c | 23 ++++++++++++----------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a997207c3b71..57684c643456 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1071,7 +1071,7 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) * Serialized with zone lock. */ PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) -FOLIO_TYPE_OPS(large_kmalloc, large_kmalloc) +PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc) /** * PageHuge - Determine if the page belongs to hugetlbfs diff --git a/mm/slab.h b/mm/slab.h index 084231394250..e3a85318a6e5 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -595,6 +595,16 @@ static inline size_t slab_ksize(const struct kmem_cache *s) return s->size; } +static inline unsigned int large_kmalloc_order(const struct page *page) +{ + return page[1].flags.f & 0xff; +} + +static inline size_t large_kmalloc_size(const struct page *page) +{ + return PAGE_SIZE << large_kmalloc_order(page); +} + #ifdef CONFIG_SLUB_DEBUG void dump_unreclaimable_slab(void); #else diff --git a/mm/slab_common.c b/mm/slab_common.c index 08f5baee1309..3216a4e65b51 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -994,26 +994,27 @@ void __init create_kmalloc_caches(void) */ size_t __ksize(const void *object) { - struct folio *folio; + const struct page *page; + const struct slab *slab; if (unlikely(object == ZERO_SIZE_PTR)) return 0; - folio = virt_to_folio(object); + page = virt_to_page(object); - if (unlikely(!folio_test_slab(folio))) { - if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE)) - return 0; - if (WARN_ON(object != folio_address(folio))) - return 0; - return folio_size(folio); - } + if (unlikely(PageLargeKmalloc(page))) + return large_kmalloc_size(page); + + slab = page_slab(page); + /* Delete this after we're sure there are no users */ + if (WARN_ON(!slab)) + return page_size(page); #ifdef CONFIG_SLUB_DEBUG - skip_orig_size_check(folio_slab(folio)->slab_cache, object); + skip_orig_size_check(slab->slab_cache, object); #endif - return slab_ksize(folio_slab(folio)->slab_cache); + return slab_ksize(slab->slab_cache); } gfp_t kmalloc_fix_flags(gfp_t flags) -- 2.47.2 This allows us to skip the compound_head() call for large kmalloc objects as the virt_to_page() call will always give us the head page for the large kmalloc case. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b32bffdd6e3c..9bca3e4b82e6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2304,33 +2304,34 @@ bool memcg_slab_post_charge(void *p, gfp_t flags) { struct slabobj_ext *slab_exts; struct kmem_cache *s; - struct folio *folio; + struct page *page; struct slab *slab; unsigned long off; - folio = virt_to_folio(p); - if (!folio_test_slab(folio)) { + page = virt_to_page(p); + if (PageLargeKmalloc(page)) { + unsigned int order; int size; - if (folio_memcg_kmem(folio)) + if (PageMemcgKmem(page)) return true; - if (__memcg_kmem_charge_page(folio_page(folio, 0), flags, - folio_order(folio))) + order = large_kmalloc_order(page); + if (__memcg_kmem_charge_page(page, flags, order)) return false; /* - * This folio has already been accounted in the global stats but + * This page has already been accounted in the global stats but * not in the memcg stats. So, subtract from the global and use * the interface which adds to both global and memcg stats. */ - size = folio_size(folio); - node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size); + size = PAGE_SIZE << order; + mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, -size); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, size); return true; } - slab = folio_slab(folio); + slab = page_slab(page); s = slab->slab_cache; /* -- 2.47.2 Use pages directly to further the split between slab and folio. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 9bca3e4b82e6..7973b064639e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2979,21 +2979,21 @@ static void barn_shrink(struct kmem_cache *s, struct node_barn *barn) static inline struct slab *alloc_slab_page(gfp_t flags, int node, struct kmem_cache_order_objects oo) { - struct folio *folio; + struct page *page; struct slab *slab; unsigned int order = oo_order(oo); if (node == NUMA_NO_NODE) - folio = (struct folio *)alloc_frozen_pages(flags, order); + page = alloc_frozen_pages(flags, order); else - folio = (struct folio *)__alloc_frozen_pages(flags, order, node, NULL); + page = __alloc_frozen_pages(flags, order, node, NULL); - if (!folio) + if (!page) return NULL; - slab = folio_slab(folio); - __folio_set_slab(folio); - if (folio_is_pfmemalloc(folio)) + __SetPageSlab(page); + slab = page_slab(page); + if (page_is_pfmemalloc(page)) slab_set_pfmemalloc(slab); return slab; @@ -3212,16 +3212,16 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node) static void __free_slab(struct kmem_cache *s, struct slab *slab) { - struct folio *folio = slab_folio(slab); - int order = folio_order(folio); + struct page *page = slab_page(slab); + int order = compound_order(page); int pages = 1 << order; __slab_clear_pfmemalloc(slab); - folio->mapping = NULL; - __folio_clear_slab(folio); + page->mapping = NULL; + __ClearPageSlab(page); mm_account_reclaimed_pages(pages); unaccount_slab(slab, order, s); - free_frozen_pages(&folio->page, order); + free_frozen_pages(page, order); } static void rcu_free_slab(struct rcu_head *h) -- 2.47.2 There's no need to use folio APIs here; just use a page directly. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 7973b064639e..28f68887ac3c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5284,7 +5284,7 @@ unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) */ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) { - struct folio *folio; + struct page *page; void *ptr = NULL; unsigned int order = get_order(size); @@ -5294,15 +5294,15 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) flags |= __GFP_COMP; if (node == NUMA_NO_NODE) - folio = (struct folio *)alloc_frozen_pages_noprof(flags, order); + page = alloc_frozen_pages_noprof(flags, order); else - folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, node, NULL); + page = __alloc_frozen_pages_noprof(flags, order, node, NULL); - if (folio) { - ptr = folio_address(folio); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, + if (page) { + ptr = page_address(page); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, PAGE_SIZE << order); - __folio_set_large_kmalloc(folio); + __SetPageLargeKmalloc(page); } ptr = kasan_kmalloc_large(ptr, size, flags); -- 2.47.2 There's no need to use folio APIs here; just use a page directly. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 28f68887ac3c..68a013411ff7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6223,12 +6223,12 @@ void kmem_cache_free(struct kmem_cache *s, void *x) } EXPORT_SYMBOL(kmem_cache_free); -static void free_large_kmalloc(struct folio *folio, void *object) +static void free_large_kmalloc(struct page *page, void *object) { - unsigned int order = folio_order(folio); + unsigned int order = compound_order(page); - if (WARN_ON_ONCE(!folio_test_large_kmalloc(folio))) { - dump_page(&folio->page, "Not a kmalloc allocation"); + if (WARN_ON_ONCE(!PageLargeKmalloc(page))) { + dump_page(page, "Not a kmalloc allocation"); return; } @@ -6239,10 +6239,10 @@ static void free_large_kmalloc(struct folio *folio, void *object) kasan_kfree_large(object); kmsan_kfree_large(object); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); - __folio_clear_large_kmalloc(folio); - free_frozen_pages(&folio->page, order); + __ClearPageLargeKmalloc(page); + free_frozen_pages(page, order); } /* @@ -6270,7 +6270,7 @@ void kvfree_rcu_cb(struct rcu_head *head) * consider folio order */ obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj); - free_large_kmalloc(folio, obj); + free_large_kmalloc(&folio->page, obj); return; } @@ -6310,7 +6310,7 @@ void kfree(const void *object) folio = virt_to_folio(object); if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, (void *)object); + free_large_kmalloc(&folio->page, (void *)object); return; } @@ -6669,7 +6669,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!s) { /* Handle kalloc'ed objects */ if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, object); + free_large_kmalloc(&folio->page, object); df->slab = NULL; return size; } -- 2.47.2 As with memcg_slab_post_charge(), we save a call to compound_head() for large kmallocs. This has a slight change of behaviour in that non-vmalloc, non-slab, non-kmalloc pointers will now cause a NULL pointer dereference rather than a warning. We could add that back if really needed. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 68a013411ff7..547ef6ef4373 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6252,7 +6252,7 @@ static void free_large_kmalloc(struct page *page, void *object) void kvfree_rcu_cb(struct rcu_head *head) { void *obj = head; - struct folio *folio; + struct page *page; struct slab *slab; struct kmem_cache *s; void *slab_addr; @@ -6263,20 +6263,20 @@ void kvfree_rcu_cb(struct rcu_head *head) return; } - folio = virt_to_folio(obj); - if (!folio_test_slab(folio)) { + page = virt_to_page(obj); + if (PageLargeKmalloc(page)) { /* * rcu_head offset can be only less than page size so no need to - * consider folio order + * consider allocation order */ obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj); - free_large_kmalloc(&folio->page, obj); + free_large_kmalloc(page, obj); return; } - slab = folio_slab(folio); + slab = page_slab(page); s = slab->slab_cache; - slab_addr = folio_address(folio); + slab_addr = slab_address(slab); if (is_kfence_address(obj)) { obj = kfence_object_start(obj); -- 2.47.2 As with memcg_slab_post_charge(), we save a call to compound_head() for large kmallocs. This has a slight change of behaviour in that non-vmalloc, non-slab, non-kmalloc pointers will now cause a NULL pointer dereference rather than a warning. We could add that back if really needed. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 547ef6ef4373..a06a86074170 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6298,7 +6298,7 @@ void kvfree_rcu_cb(struct rcu_head *head) */ void kfree(const void *object) { - struct folio *folio; + struct page *page; struct slab *slab; struct kmem_cache *s; void *x = (void *)object; @@ -6308,13 +6308,13 @@ void kfree(const void *object) if (unlikely(ZERO_OR_NULL_PTR(object))) return; - folio = virt_to_folio(object); - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(&folio->page, (void *)object); + page = virt_to_page(object); + if (unlikely(PageLargeKmalloc(page))) { + free_large_kmalloc(page, (void *)object); return; } - slab = folio_slab(folio); + slab = page_slab(page); s = slab->slab_cache; slab_free(s, slab, x, _RET_IP_); } -- 2.47.2 As with memcg_slab_post_charge(), we save a call to compound_head() for large kmallocs. This has a slight change of behaviour in that non-vmalloc, non-slab, non-kmalloc pointers will now cause a NULL pointer dereference rather than a warning. We could add that back if really needed. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a06a86074170..398dfecebb75 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6348,16 +6348,16 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, if (is_kfence_address(p)) { ks = orig_size = kfence_ksize(p); } else { - struct folio *folio; + struct page *page; - folio = virt_to_folio(p); - if (unlikely(!folio_test_slab(folio))) { + page = virt_to_page(p); + if (unlikely(PageLargeKmalloc(page))) { /* Big kmalloc object */ - WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE); - WARN_ON(p != folio_address(folio)); - ks = folio_size(folio); + ks = page_size(page); + WARN_ON(ks <= KMALLOC_MAX_CACHE_SIZE); + WARN_ON(p != page_address(page)); } else { - s = folio_slab(folio)->slab_cache; + s = page_slab(page)->slab_cache; orig_size = get_orig_size(s, (void *)p); ks = s->object_size; } -- 2.47.2 As with memcg_slab_post_charge(), we save a call to compound_head() for large kmallocs. This has a slight change of behaviour in that non-slab, non-kmalloc pointers will now cause a NULL pointer dereference rather than a warning. We could add that back if really needed. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 398dfecebb75..0f4e7a2a5887 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6661,23 +6661,23 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, { int lookahead = 3; void *object; - struct folio *folio; + struct page *page; size_t same; object = p[--size]; - folio = virt_to_folio(object); + page = virt_to_page(object); if (!s) { /* Handle kalloc'ed objects */ - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(&folio->page, object); + if (unlikely(PageLargeKmalloc(page))) { + free_large_kmalloc(page, object); df->slab = NULL; return size; } /* Derive kmem_cache from object */ - df->slab = folio_slab(folio); + df->slab = page_slab(page); df->s = df->slab->slab_cache; } else { - df->slab = folio_slab(folio); + df->slab = page_slab(page); df->s = cache_from_obj(s, object); /* Support for memcg */ } -- 2.47.2 All three callers of free_large_kmalloc() check PageLargeKmalloc first so this warning is now unnecessary. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 0f4e7a2a5887..523a4551ceb6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6227,11 +6227,6 @@ static void free_large_kmalloc(struct page *page, void *object) { unsigned int order = compound_order(page); - if (WARN_ON_ONCE(!PageLargeKmalloc(page))) { - dump_page(page, "Not a kmalloc allocation"); - return; - } - if (WARN_ON_ONCE(order == 0)) pr_warn_once("object pointer: 0x%p\n", object); -- 2.47.2 In preparation for splitting struct slab from struct page and struct folio, remove mentions of struct folio from this function. Since we don't need to handle large kmalloc objects specially here, we can just use virt_to_slab(). Signed-off-by: Matthew Wilcox (Oracle) --- mm/slab_common.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 3216a4e65b51..8c11192f5b2b 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1612,17 +1612,15 @@ static void kfree_rcu_work(struct work_struct *work) static bool kfree_rcu_sheaf(void *obj) { struct kmem_cache *s; - struct folio *folio; struct slab *slab; if (is_vmalloc_addr(obj)) return false; - folio = virt_to_folio(obj); - if (unlikely(!folio_test_slab(folio))) + slab = virt_to_slab(obj); + if (unlikely(!slab)) return false; - slab = folio_slab(folio); s = slab->slab_cache; if (s->cpu_sheaves) { if (likely(!IS_ENABLED(CONFIG_NUMA) || -- 2.47.2 Because the pointer being checked may not lie within the first PAGE_SIZE bytes of the object, we have to mark all pages as being LargeKmalloc. We could use virt_to_head_page() instead, but that would pessimize slab objects. Once we move to memdescs properly, we'll tag each page as being LargeKmalloc anyway, so this is more in keeping with how code will be written in the future. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 8 ++++++-- mm/usercopy.c | 21 ++++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 523a4551ceb6..58e8354e12a2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5299,10 +5299,12 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) page = __alloc_frozen_pages_noprof(flags, order, node, NULL); if (page) { + unsigned long i; ptr = page_address(page); mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, PAGE_SIZE << order); - __SetPageLargeKmalloc(page); + for (i = 0; i < 1UL << order; i++) + __SetPageLargeKmalloc(page + i); } ptr = kasan_kmalloc_large(ptr, size, flags); @@ -6226,6 +6228,7 @@ EXPORT_SYMBOL(kmem_cache_free); static void free_large_kmalloc(struct page *page, void *object) { unsigned int order = compound_order(page); + unsigned long i; if (WARN_ON_ONCE(order == 0)) pr_warn_once("object pointer: 0x%p\n", object); @@ -6236,7 +6239,8 @@ static void free_large_kmalloc(struct page *page, void *object) mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); - __ClearPageLargeKmalloc(page); + for (i = 0; i < 1UL << order; i++) + __ClearPageLargeKmalloc(page + i); free_frozen_pages(page, order); } diff --git a/mm/usercopy.c b/mm/usercopy.c index dbdcc43964fb..8d21635147a4 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -164,7 +164,7 @@ static inline void check_heap_object(const void *ptr, unsigned long n, { unsigned long addr = (unsigned long)ptr; unsigned long offset; - struct folio *folio; + struct page *page; if (is_kmap_addr(ptr)) { offset = offset_in_page(ptr); @@ -189,15 +189,18 @@ static inline void check_heap_object(const void *ptr, unsigned long n, if (!virt_addr_valid(ptr)) return; - folio = virt_to_folio(ptr); - - if (folio_test_slab(folio)) { + page = virt_to_page(ptr); + if (PageLargeKmalloc(page)) { + page = compound_head(page); + offset = ptr - page_address(page); + if (n > page_size(page) - offset) + usercopy_abort("kmalloc", NULL, to_user, offset, n); + return; + } else { + struct slab *slab = page_slab(page); /* Check slab allocator for flags and size. */ - __check_heap_object(ptr, n, folio_slab(folio), to_user); - } else if (folio_test_large(folio)) { - offset = ptr - folio_address(folio); - if (n > folio_size(folio) - offset) - usercopy_abort("page alloc", NULL, to_user, offset, n); + if (slab) + __check_heap_object(ptr, n, slab, to_user); } } -- 2.47.2 In preparation for splitting struct slab from struct page and struct folio, convert the pointer to a slab rather than a folio. This means we can end up passing a NULL slab pointer to mem_cgroup_from_obj_slab() if the pointer is not to a page allocated to slab, and we handle that appropriately by returning NULL. Signed-off-by: Matthew Wilcox (Oracle) --- mm/memcontrol.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 257d2c76b730..f72ec90ea365 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2555,38 +2555,28 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, } static __always_inline -struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) +struct mem_cgroup *mem_cgroup_from_obj_slab(struct slab *slab, void *p) { /* * Slab objects are accounted individually, not per-page. * Memcg membership data for each individual object is saved in * slab->obj_exts. */ - if (folio_test_slab(folio)) { - struct slabobj_ext *obj_exts; - struct slab *slab; - unsigned int off; - - slab = folio_slab(folio); - obj_exts = slab_obj_exts(slab); - if (!obj_exts) - return NULL; + struct slabobj_ext *obj_exts; + unsigned int off; - off = obj_to_index(slab->slab_cache, slab, p); - if (obj_exts[off].objcg) - return obj_cgroup_memcg(obj_exts[off].objcg); + if (!slab) + return NULL; + obj_exts = slab_obj_exts(slab); + if (!obj_exts) return NULL; - } - /* - * folio_memcg_check() is used here, because in theory we can encounter - * a folio where the slab flag has been cleared already, but - * slab->obj_exts has not been freed yet - * folio_memcg_check() will guarantee that a proper memory - * cgroup pointer or NULL will be returned. - */ - return folio_memcg_check(folio); + off = obj_to_index(slab->slab_cache, slab, p); + if (obj_exts[off].objcg) + return obj_cgroup_memcg(obj_exts[off].objcg); + + return NULL; } /* @@ -2603,7 +2593,7 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) if (mem_cgroup_disabled()) return NULL; - return mem_cgroup_from_obj_folio(virt_to_folio(p), p); + return mem_cgroup_from_obj_slab(virt_to_slab(p), p); } static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg) -- 2.47.2 In preparation for splitting struct slab from struct page and struct folio, remove mentions of struct folio from this function. We can discard the comment as using PageLargeKmalloc() rather than !folio_test_slab() makes it obvious. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand --- mm/kasan/common.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index e3765931a31f..e90f24aa76db 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -517,24 +517,20 @@ void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order, bool __kasan_mempool_poison_object(void *ptr, unsigned long ip) { - struct folio *folio = virt_to_folio(ptr); + struct page *page = virt_to_page(ptr); struct slab *slab; - /* - * This function can be called for large kmalloc allocation that get - * their memory from page_alloc. Thus, the folio might not be a slab. - */ - if (unlikely(!folio_test_slab(folio))) { + if (unlikely(PageLargeKmalloc(page))) { if (check_page_allocation(ptr, ip)) return false; - kasan_poison(ptr, folio_size(folio), KASAN_PAGE_FREE, false); + kasan_poison(ptr, page_size(page), KASAN_PAGE_FREE, false); return true; } if (is_kfence_address(ptr)) return true; - slab = folio_slab(folio); + slab = page_slab(page); if (check_slab_allocation(slab->slab_cache, ptr, ip)) return false; -- 2.47.2 Use page_slab() instead of virt_to_folio() which will work perfectly when struct slab is separated from struct folio. This was the last user of folio_slab(), so delete it. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slab.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index e3a85318a6e5..1b5639a148df 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -113,19 +113,6 @@ static_assert(sizeof(struct slab) <= sizeof(struct page)); static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t))); #endif -/** - * folio_slab - Converts from folio to slab. - * @folio: The folio. - * - * Currently struct slab is a different representation of a folio where - * folio_test_slab() is true. - * - * Return: The slab which contains this folio. - */ -#define folio_slab(folio) (_Generic((folio), \ - const struct folio *: (const struct slab *)(folio), \ - struct folio *: (struct slab *)(folio))) - /** * slab_folio - The folio allocated for a slab * @s: The slab. @@ -188,12 +175,7 @@ static inline pg_data_t *slab_pgdat(const struct slab *slab) static inline struct slab *virt_to_slab(const void *addr) { - struct folio *folio = virt_to_folio(addr); - - if (!folio_test_slab(folio)) - return NULL; - - return folio_slab(folio); + return page_slab(virt_to_page(addr)); } static inline int slab_order(const struct slab *slab) -- 2.47.2 The page_has_type() call would have included slab since commit 46df8e73a4a3 and now we don't even get that far because slab pages have a zero refcount since commit 9aec2fb0fd5e. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand --- mm/memory.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index d9de6c056179..cc9ba448a7dc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2266,8 +2266,7 @@ static int validate_page_before_insert(struct vm_area_struct *vma, return -EINVAL; return 0; } - if (folio_test_anon(folio) || folio_test_slab(folio) || - page_has_type(page)) + if (folio_test_anon(folio) || page_has_type(page)) return -EINVAL; flush_dcache_folio(folio); return 0; -- 2.47.2