In order to separate slabs from folios, we need to convert from any page in a slab to the slab directly without going through a page to folio conversion first. Up to this point, page_slab() has followed the example of other memdesc converters (page_folio(), page_ptdesc() etc) and just cast the pointer to the requested type, regardless of whether the pointer is actually a pointer to the correct type or not. That changes with this commit; we check that the page actually belongs to a slab and return NULL if it does not. Other memdesc converters will adopt this convention in future. kfence was the only user of page_slab(), so adjust it to the new way of working. It will need to be touched again when we separate slab from page. Signed-off-by: Matthew Wilcox (Oracle) Cc: Alexander Potapenko Cc: Marco Elver Cc: kasan-dev@googlegroups.com --- include/linux/page-flags.h | 14 +------------- mm/kfence/core.c | 14 ++++++++------ mm/slab.h | 28 ++++++++++++++++------------ 3 files changed, 25 insertions(+), 31 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0091ad1986bf..6d5e44968eab 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1048,19 +1048,7 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) -FOLIO_TYPE_OPS(slab, slab) - -/** - * PageSlab - Determine if the page belongs to the slab allocator - * @page: The page to test. - * - * Context: Any context. - * Return: True for slab pages, false for any other kind of page. - */ -static inline bool PageSlab(const struct page *page) -{ - return folio_test_slab(page_folio(page)); -} +PAGE_TYPE_OPS(Slab, slab, slab) #ifdef CONFIG_HUGETLB_PAGE FOLIO_TYPE_OPS(hugetlb, hugetlb) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 727c20c94ac5..e62b5516bf48 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -612,14 +612,15 @@ static unsigned long kfence_init_pool(void) * enters __slab_free() slow-path. */ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { - struct slab *slab; + struct page *page; if (!i || (i % 2)) continue; - slab = page_slab(pfn_to_page(start_pfn + i)); - __folio_set_slab(slab_folio(slab)); + page = pfn_to_page(start_pfn + i); + __SetPageSlab(page); #ifdef CONFIG_MEMCG + struct slab *slab = page_slab(page); slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts | MEMCG_DATA_OBJEXTS; #endif @@ -665,16 +666,17 @@ static unsigned long kfence_init_pool(void) reset_slab: for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { - struct slab *slab; + struct page *page; if (!i || (i % 2)) continue; - slab = page_slab(pfn_to_page(start_pfn + i)); + page = pfn_to_page(start_pfn + i); #ifdef CONFIG_MEMCG + struct slab *slab = page_slab(page); slab->obj_exts = 0; #endif - __folio_clear_slab(slab_folio(slab)); + __ClearPageSlab(page); } return addr; diff --git a/mm/slab.h b/mm/slab.h index f7b8df56727d..18cdb8e85273 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -146,20 +146,24 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t) struct slab *: (struct folio *)s)) /** - * page_slab - Converts from first struct page to slab. - * @p: The first (either head of compound or single) page of slab. + * page_slab - Converts from struct page to its slab. + * @page: A page which may or may not belong to a slab. * - * A temporary wrapper to convert struct page to struct slab in situations where - * we know the page is the compound head, or single order-0 page. - * - * Long-term ideally everything would work with struct slab directly or go - * through folio to struct slab. - * - * Return: The slab which contains this page + * Return: The slab which contains this page or NULL if the page does + * not belong to a slab. This includes pages returned from large kmalloc. */ -#define page_slab(p) (_Generic((p), \ - const struct page *: (const struct slab *)(p), \ - struct page *: (struct slab *)(p))) +static inline struct slab *page_slab(const struct page *page) +{ + unsigned long head; + + head = READ_ONCE(page->compound_head); + if (head & 1) + page = (struct page *)(head - 1); + if (data_race(page->page_type >> 24) != PGTY_slab) + page = NULL; + + return (struct slab *)page; +} /** * slab_page - The first struct page allocated for a slab -- 2.47.2 In the future, we will separate slab, folio and page from each other and calling virt_to_folio() on an address allocated from slab will return NULL. Delay the conversion from struct page to struct slab until we know we're not dealing with a large kmalloc allocation. There's a minor win for large kmalloc allocations as we avoid the compound_head() hidden in virt_to_folio(). This deprecates calling ksize() on memory allocated by alloc_pages(). Today it becomes a warning and support will be removed entirely in the future. Introduce large_kmalloc_size() to abstract how we represent the size of a large kmalloc allocation. For now, this is the same as page_size(), but it will change with separately allocated memdescs. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/page-flags.h | 2 +- mm/slab.h | 10 ++++++++++ mm/slab_common.c | 23 ++++++++++++----------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6d5e44968eab..f7a0e4af0c73 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1064,7 +1064,7 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) * Serialized with zone lock. */ PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) -FOLIO_TYPE_OPS(large_kmalloc, large_kmalloc) +PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc) /** * PageHuge - Determine if the page belongs to hugetlbfs diff --git a/mm/slab.h b/mm/slab.h index 18cdb8e85273..0422f2acf8c6 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -603,6 +603,16 @@ static inline size_t slab_ksize(const struct kmem_cache *s) return s->size; } +static inline unsigned int large_kmalloc_order(const struct page *page) +{ + return page[1].flags.f & 0xff; +} + +static inline size_t large_kmalloc_size(const struct page *page) +{ + return PAGE_SIZE << large_kmalloc_order(page); +} + #ifdef CONFIG_SLUB_DEBUG void dump_unreclaimable_slab(void); #else diff --git a/mm/slab_common.c b/mm/slab_common.c index d2824daa98cf..236b4e25fce0 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -997,26 +997,27 @@ void __init create_kmalloc_caches(void) */ size_t __ksize(const void *object) { - struct folio *folio; + const struct page *page; + const struct slab *slab; if (unlikely(object == ZERO_SIZE_PTR)) return 0; - folio = virt_to_folio(object); + page = virt_to_page(object); - if (unlikely(!folio_test_slab(folio))) { - if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE)) - return 0; - if (WARN_ON(object != folio_address(folio))) - return 0; - return folio_size(folio); - } + if (unlikely(PageLargeKmalloc(page))) + return large_kmalloc_size(page); + + slab = page_slab(page); + /* Delete this after we're sure there are no users */ + if (WARN_ON(!slab)) + return page_size(page); #ifdef CONFIG_SLUB_DEBUG - skip_orig_size_check(folio_slab(folio)->slab_cache, object); + skip_orig_size_check(slab->slab_cache, object); #endif - return slab_ksize(folio_slab(folio)->slab_cache); + return slab_ksize(slab->slab_cache); } gfp_t kmalloc_fix_flags(gfp_t flags) -- 2.47.2 This allows us to skip the compound_head() call for large kmalloc objects as the virt_to_page() call will always give us the head page for the large kmalloc case. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Harry Yoo --- mm/slub.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 7715c40672a7..ddfc9db4c6c1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2376,33 +2376,34 @@ bool memcg_slab_post_charge(void *p, gfp_t flags) { struct slabobj_ext *slab_exts; struct kmem_cache *s; - struct folio *folio; + struct page *page; struct slab *slab; unsigned long off; - folio = virt_to_folio(p); - if (!folio_test_slab(folio)) { + page = virt_to_page(p); + if (PageLargeKmalloc(page)) { + unsigned int order; int size; - if (folio_memcg_kmem(folio)) + if (PageMemcgKmem(page)) return true; - if (__memcg_kmem_charge_page(folio_page(folio, 0), flags, - folio_order(folio))) + order = large_kmalloc_order(page); + if (__memcg_kmem_charge_page(page, flags, order)) return false; /* - * This folio has already been accounted in the global stats but + * This page has already been accounted in the global stats but * not in the memcg stats. So, subtract from the global and use * the interface which adds to both global and memcg stats. */ - size = folio_size(folio); - node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size); + size = PAGE_SIZE << order; + mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, -size); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, size); return true; } - slab = folio_slab(folio); + slab = page_slab(page); s = slab->slab_cache; /* -- 2.47.2 Use pages directly to further the split between slab and folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Harry Yoo --- mm/slub.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index ddfc9db4c6c1..4a093140f985 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3074,24 +3074,24 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node, struct kmem_cache_order_objects oo, bool allow_spin) { - struct folio *folio; + struct page *page; struct slab *slab; unsigned int order = oo_order(oo); if (unlikely(!allow_spin)) - folio = (struct folio *)alloc_frozen_pages_nolock(0/* __GFP_COMP is implied */, + page = alloc_frozen_pages_nolock(0/* __GFP_COMP is implied */, node, order); else if (node == NUMA_NO_NODE) - folio = (struct folio *)alloc_frozen_pages(flags, order); + page = alloc_frozen_pages(flags, order); else - folio = (struct folio *)__alloc_frozen_pages(flags, order, node, NULL); + page = __alloc_frozen_pages(flags, order, node, NULL); - if (!folio) + if (!page) return NULL; - slab = folio_slab(folio); - __folio_set_slab(folio); - if (folio_is_pfmemalloc(folio)) + __SetPageSlab(page); + slab = page_slab(page); + if (page_is_pfmemalloc(page)) slab_set_pfmemalloc(slab); return slab; @@ -3315,16 +3315,16 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node) static void __free_slab(struct kmem_cache *s, struct slab *slab) { - struct folio *folio = slab_folio(slab); - int order = folio_order(folio); + struct page *page = slab_page(slab); + int order = compound_order(page); int pages = 1 << order; __slab_clear_pfmemalloc(slab); - folio->mapping = NULL; - __folio_clear_slab(folio); + page->mapping = NULL; + __ClearPageSlab(page); mm_account_reclaimed_pages(pages); unaccount_slab(slab, order, s); - free_frozen_pages(&folio->page, order); + free_frozen_pages(page, order); } static void rcu_free_slab(struct rcu_head *h) -- 2.47.2 There's no need to use folio APIs here; just use a page directly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Harry Yoo --- mm/slub.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 4a093140f985..90c5663f2b71 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5601,7 +5601,7 @@ unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) */ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) { - struct folio *folio; + struct page *page; void *ptr = NULL; unsigned int order = get_order(size); @@ -5611,15 +5611,15 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) flags |= __GFP_COMP; if (node == NUMA_NO_NODE) - folio = (struct folio *)alloc_frozen_pages_noprof(flags, order); + page = alloc_frozen_pages_noprof(flags, order); else - folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, node, NULL); + page = __alloc_frozen_pages_noprof(flags, order, node, NULL); - if (folio) { - ptr = folio_address(folio); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, + if (page) { + ptr = page_address(page); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, PAGE_SIZE << order); - __folio_set_large_kmalloc(folio); + __SetPageLargeKmalloc(page); } ptr = kasan_kmalloc_large(ptr, size, flags); -- 2.47.2 There's no need to use folio APIs here; just use a page directly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Harry Yoo --- mm/slub.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 90c5663f2b71..05cb7ec48f09 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6791,12 +6791,12 @@ void kmem_cache_free(struct kmem_cache *s, void *x) } EXPORT_SYMBOL(kmem_cache_free); -static void free_large_kmalloc(struct folio *folio, void *object) +static void free_large_kmalloc(struct page *page, void *object) { - unsigned int order = folio_order(folio); + unsigned int order = compound_order(page); - if (WARN_ON_ONCE(!folio_test_large_kmalloc(folio))) { - dump_page(&folio->page, "Not a kmalloc allocation"); + if (WARN_ON_ONCE(!PageLargeKmalloc(page))) { + dump_page(page, "Not a kmalloc allocation"); return; } @@ -6807,10 +6807,10 @@ static void free_large_kmalloc(struct folio *folio, void *object) kasan_kfree_large(object); kmsan_kfree_large(object); - lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); - __folio_clear_large_kmalloc(folio); - free_frozen_pages(&folio->page, order); + __ClearPageLargeKmalloc(page); + free_frozen_pages(page, order); } /* @@ -6838,7 +6838,7 @@ void kvfree_rcu_cb(struct rcu_head *head) * consider folio order */ obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj); - free_large_kmalloc(folio, obj); + free_large_kmalloc(&folio->page, obj); return; } @@ -6878,7 +6878,7 @@ void kfree(const void *object) folio = virt_to_folio(object); if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, (void *)object); + free_large_kmalloc(&folio->page, (void *)object); return; } @@ -7305,7 +7305,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!s) { /* Handle kalloc'ed objects */ if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, object); + free_large_kmalloc(&folio->page, object); df->slab = NULL; return size; } -- 2.47.2 Remove conversions from folio to page and folio to slab. This is preparation for separately allocated struct slab from struct page. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 05cb7ec48f09..976e116580a6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6820,7 +6820,7 @@ static void free_large_kmalloc(struct page *page, void *object) void kvfree_rcu_cb(struct rcu_head *head) { void *obj = head; - struct folio *folio; + struct page *page; struct slab *slab; struct kmem_cache *s; void *slab_addr; @@ -6831,20 +6831,20 @@ void kvfree_rcu_cb(struct rcu_head *head) return; } - folio = virt_to_folio(obj); - if (!folio_test_slab(folio)) { + page = virt_to_page(obj); + slab = page_slab(page); + if (!slab) { /* * rcu_head offset can be only less than page size so no need to - * consider folio order + * consider allocation order */ obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj); - free_large_kmalloc(&folio->page, obj); + free_large_kmalloc(page, obj); return; } - slab = folio_slab(folio); s = slab->slab_cache; - slab_addr = folio_address(folio); + slab_addr = slab_address(slab); if (is_kfence_address(obj)) { obj = kfence_object_start(obj); -- 2.47.2 This should generate identical code to the previous version, but without any dependency on how folios work. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 976e116580a6..184bab91e0af 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6866,7 +6866,7 @@ void kvfree_rcu_cb(struct rcu_head *head) */ void kfree(const void *object) { - struct folio *folio; + struct page *page; struct slab *slab; struct kmem_cache *s; void *x = (void *)object; @@ -6876,13 +6876,13 @@ void kfree(const void *object) if (unlikely(ZERO_OR_NULL_PTR(object))) return; - folio = virt_to_folio(object); - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(&folio->page, (void *)object); + page = virt_to_page(object); + slab = page_slab(page); + if (!slab) { + free_large_kmalloc(page, (void *)object); return; } - slab = folio_slab(folio); s = slab->slab_cache; slab_free(s, slab, x, _RET_IP_); } -- 2.47.2 One slight tweak I made is to calculate 'ks' earlier, which means we can reuse it in the warning rather than calculating the object size twice. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 184bab91e0af..4570d1dce271 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6977,16 +6977,16 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, if (is_kfence_address(p)) { ks = orig_size = kfence_ksize(p); } else { - struct folio *folio; + struct page *page = virt_to_page(p); + struct slab *slab = page_slab(page); - folio = virt_to_folio(p); - if (unlikely(!folio_test_slab(folio))) { + if (!slab) { /* Big kmalloc object */ - WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE); - WARN_ON(p != folio_address(folio)); - ks = folio_size(folio); + ks = page_size(page); + WARN_ON(ks <= KMALLOC_MAX_CACHE_SIZE); + WARN_ON(p != page_address(page)); } else { - s = folio_slab(folio)->slab_cache; + s = slab->slab_cache; orig_size = get_orig_size(s, (void *)p); ks = s->object_size; } -- 2.47.2 Use pages and slabs directly instead of converting to folios. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 4570d1dce271..17b456f6fd4e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -7297,23 +7297,25 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, { int lookahead = 3; void *object; - struct folio *folio; + struct page *page; + struct slab *slab; size_t same; object = p[--size]; - folio = virt_to_folio(object); + page = virt_to_page(object); + slab = page_slab(page); if (!s) { /* Handle kalloc'ed objects */ - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(&folio->page, object); + if (!slab) { + free_large_kmalloc(page, object); df->slab = NULL; return size; } /* Derive kmem_cache from object */ - df->slab = folio_slab(folio); - df->s = df->slab->slab_cache; + df->slab = slab; + df->s = slab->slab_cache; } else { - df->slab = folio_slab(folio); + df->slab = slab; df->s = cache_from_obj(s, object); /* Support for memcg */ } -- 2.47.2 In preparation for splitting struct slab from struct page and struct folio, remove mentions of struct folio from this function. Since we don't need to handle large kmalloc objects specially here, we can just use virt_to_slab(). Signed-off-by: Matthew Wilcox (Oracle) --- mm/slab_common.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 236b4e25fce0..b613533b29e7 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1615,17 +1615,15 @@ static void kfree_rcu_work(struct work_struct *work) static bool kfree_rcu_sheaf(void *obj) { struct kmem_cache *s; - struct folio *folio; struct slab *slab; if (is_vmalloc_addr(obj)) return false; - folio = virt_to_folio(obj); - if (unlikely(!folio_test_slab(folio))) + slab = virt_to_slab(obj); + if (unlikely(!slab)) return false; - slab = folio_slab(folio); s = slab->slab_cache; if (s->cpu_sheaves) { if (likely(!IS_ENABLED(CONFIG_NUMA) || -- 2.47.2 In preparation for splitting struct slab from struct page and struct folio, remove mentions of struct folio from this function. Since large kmalloc objects are not supported here, we can just use virt_to_slab(). Signed-off-by: Matthew Wilcox (Oracle) --- mm/slub.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 17b456f6fd4e..d79aa826d3e5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6899,7 +6899,6 @@ EXPORT_SYMBOL(kfree); */ void kfree_nolock(const void *object) { - struct folio *folio; struct slab *slab; struct kmem_cache *s; void *x = (void *)object; @@ -6907,13 +6906,12 @@ void kfree_nolock(const void *object) if (unlikely(ZERO_OR_NULL_PTR(object))) return; - folio = virt_to_folio(object); - if (unlikely(!folio_test_slab(folio))) { + slab = virt_to_slab(object); + if (unlikely(!slab)) { WARN_ONCE(1, "large_kmalloc is not supported by kfree_nolock()"); return; } - slab = folio_slab(folio); s = slab->slab_cache; memcg_slab_free_hook(s, slab, &x, 1); -- 2.47.2 Use page_slab() instead of virt_to_folio() followed by folio_slab(). We do end up calling compound_head() twice for non-slab copies, but that will not be a problem once we allocate memdescs separately. Signed-off-by: Matthew Wilcox (Oracle) Cc: Kees Cook Cc: "Gustavo A. R. Silva" Cc: linux-hardening@vger.kernel.org --- mm/usercopy.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index dbdcc43964fb..5de7a518b1b1 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -164,7 +164,8 @@ static inline void check_heap_object(const void *ptr, unsigned long n, { unsigned long addr = (unsigned long)ptr; unsigned long offset; - struct folio *folio; + struct page *page; + struct slab *slab; if (is_kmap_addr(ptr)) { offset = offset_in_page(ptr); @@ -189,16 +190,23 @@ static inline void check_heap_object(const void *ptr, unsigned long n, if (!virt_addr_valid(ptr)) return; - folio = virt_to_folio(ptr); - - if (folio_test_slab(folio)) { + page = virt_to_page(ptr); + slab = page_slab(page); + if (slab) { /* Check slab allocator for flags and size. */ - __check_heap_object(ptr, n, folio_slab(folio), to_user); - } else if (folio_test_large(folio)) { - offset = ptr - folio_address(folio); - if (n > folio_size(folio) - offset) + __check_heap_object(ptr, n, slab, to_user); + } else if (PageCompound(page)) { + page = compound_head(page); + offset = ptr - page_address(page); + if (n > page_size(page) - offset) usercopy_abort("page alloc", NULL, to_user, offset, n); } + + /* + * We cannot check non-compound pages. They might be part of + * a large allocation, in which case crossing a page boundary + * is fine. + */ } DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_HARDENED_USERCOPY_DEFAULT_ON, -- 2.47.2 In preparation for splitting struct slab from struct page and struct folio, convert the pointer to a slab rather than a folio. This means we can end up passing a NULL slab pointer to mem_cgroup_from_obj_slab() if the pointer is not to a page allocated to slab, and we handle that appropriately by returning NULL. Signed-off-by: Matthew Wilcox (Oracle) Cc: Johannes Weiner Cc: Michal Hocko Cc: Roman Gushchin Cc: Shakeel Butt Cc: Muchun Song Cc: cgroups@vger.kernel.org --- mm/memcontrol.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 025da46d9959..b239d8ad511a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2589,38 +2589,28 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, } static __always_inline -struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) +struct mem_cgroup *mem_cgroup_from_obj_slab(struct slab *slab, void *p) { /* * Slab objects are accounted individually, not per-page. * Memcg membership data for each individual object is saved in * slab->obj_exts. */ - if (folio_test_slab(folio)) { - struct slabobj_ext *obj_exts; - struct slab *slab; - unsigned int off; - - slab = folio_slab(folio); - obj_exts = slab_obj_exts(slab); - if (!obj_exts) - return NULL; + struct slabobj_ext *obj_exts; + unsigned int off; - off = obj_to_index(slab->slab_cache, slab, p); - if (obj_exts[off].objcg) - return obj_cgroup_memcg(obj_exts[off].objcg); + if (!slab) + return NULL; + obj_exts = slab_obj_exts(slab); + if (!obj_exts) return NULL; - } - /* - * folio_memcg_check() is used here, because in theory we can encounter - * a folio where the slab flag has been cleared already, but - * slab->obj_exts has not been freed yet - * folio_memcg_check() will guarantee that a proper memory - * cgroup pointer or NULL will be returned. - */ - return folio_memcg_check(folio); + off = obj_to_index(slab->slab_cache, slab, p); + if (obj_exts[off].objcg) + return obj_cgroup_memcg(obj_exts[off].objcg); + + return NULL; } /* @@ -2637,7 +2627,7 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) if (mem_cgroup_disabled()) return NULL; - return mem_cgroup_from_obj_folio(virt_to_folio(p), p); + return mem_cgroup_from_obj_slab(virt_to_slab(p), p); } static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg) -- 2.47.2 In preparation for splitting struct slab from struct page and struct folio, remove mentions of struct folio from this function. There is a mild improvement for large kmalloc objects as we will avoid calling compound_head() for them. We can discard the comment as using PageLargeKmalloc() rather than !folio_test_slab() makes it obvious. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vincenzo Frascino Cc: kasan-dev --- mm/kasan/common.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 22e5d67ff064..1d27f1bd260b 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -517,24 +517,20 @@ void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order, bool __kasan_mempool_poison_object(void *ptr, unsigned long ip) { - struct folio *folio = virt_to_folio(ptr); + struct page *page = virt_to_page(ptr); struct slab *slab; - /* - * This function can be called for large kmalloc allocation that get - * their memory from page_alloc. Thus, the folio might not be a slab. - */ - if (unlikely(!folio_test_slab(folio))) { + if (unlikely(PageLargeKmalloc(page))) { if (check_page_allocation(ptr, ip)) return false; - kasan_poison(ptr, folio_size(folio), KASAN_PAGE_FREE, false); + kasan_poison(ptr, page_size(page), KASAN_PAGE_FREE, false); return true; } if (is_kfence_address(ptr)) return true; - slab = folio_slab(folio); + slab = page_slab(page); if (check_slab_allocation(slab->slab_cache, ptr, ip)) return false; -- 2.47.2 Use page_slab() instead of virt_to_folio() which will work perfectly when struct slab is separated from struct folio. This was the last user of folio_slab(), so delete it. Signed-off-by: Matthew Wilcox (Oracle) --- mm/slab.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 0422f2acf8c6..8430e24bba3b 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -117,19 +117,6 @@ static_assert(sizeof(struct slab) <= sizeof(struct page)); static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t))); #endif -/** - * folio_slab - Converts from folio to slab. - * @folio: The folio. - * - * Currently struct slab is a different representation of a folio where - * folio_test_slab() is true. - * - * Return: The slab which contains this folio. - */ -#define folio_slab(folio) (_Generic((folio), \ - const struct folio *: (const struct slab *)(folio), \ - struct folio *: (struct slab *)(folio))) - /** * slab_folio - The folio allocated for a slab * @s: The slab. @@ -192,12 +179,7 @@ static inline pg_data_t *slab_pgdat(const struct slab *slab) static inline struct slab *virt_to_slab(const void *addr) { - struct folio *folio = virt_to_folio(addr); - - if (!folio_test_slab(folio)) - return NULL; - - return folio_slab(folio); + return page_slab(virt_to_page(addr)); } static inline int slab_order(const struct slab *slab) -- 2.47.2