When a cache has high s->align value and s->object_size is not aligned to it, each object ends up with some unused space because of alignment. If this wasted space is big enough, we can use it to store the slabobj_ext metadata instead of wasting it. On my system, this happens with caches like kmem_cache, mm_struct, pid, task_struct, sighand_cache, xfs_inode, and others. To place the slabobj_ext metadata within each object, the existing slab_obj_ext() logic can still be used by setting: - slab->obj_exts = slab_address(slab) + s->red_left_zone + (slabobj_ext offset) - stride = s->size slab_obj_ext() doesn't need know where the metadata is stored, so this method works without adding extra overhead to slab_obj_ext(). A good example benefiting from this optimization is xfs_inode (object_size: 992, align: 64). To measure memory savings, an XFS filesystem with 6 millions of files was created, and updatedb was run within that filesystem. [ MEMCG=y, MEM_ALLOC_PROFILING=y ] Before patch (run updatedb): Slab: 8409212 kB SReclaimable: 7314864 kB SUnreclaim: 1094348 kB After patch (run updatedb): Slab: 8313324 kB SReclaimable: 7318176 kB SUnreclaim: 995148 kB (-96.87 MiB) [ MEMCG=y, MEM_ALLOC_PROFILING=n ] Before patch (run updatedb): Slab: 8081708 kB SReclaimable: 7314400 kB SUnreclaim: 767308 kB After patch (run updatedb): Slab: 8034676 kB SReclaimable: 7314532 kB SUnreclaim: 720144 kB (-46.06 MiB) Enjoy the memory savings! Suggested-by: Vlastimil Babka Signed-off-by: Harry Yoo --- include/linux/slab.h | 3 ++ mm/slab_common.c | 6 ++-- mm/slub.c | 69 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 5 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 680193356ac7..279d35b40e8e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -59,6 +59,7 @@ enum _slab_flag_bits { _SLAB_CMPXCHG_DOUBLE, #ifdef CONFIG_SLAB_OBJ_EXT _SLAB_NO_OBJ_EXT, + _SLAB_OBJ_EXT_IN_OBJ, #endif _SLAB_FLAGS_LAST_BIT }; @@ -240,8 +241,10 @@ enum _slab_flag_bits { /* Slab created using create_boot_cache */ #ifdef CONFIG_SLAB_OBJ_EXT #define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) +#define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ) #else #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED +#define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_UNUSED #endif /* diff --git a/mm/slab_common.c b/mm/slab_common.c index 08f5baee1309..cbd85eecd430 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -43,11 +43,13 @@ DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; /* - * Set of flags that will prevent slab merging + * Set of flags that will prevent slab merging. + * Any flag that adds per-object metadata should be included, + * since slab merging can update s->inuse that affects the metadata layout. */ #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ - SLAB_FAILSLAB | SLAB_NO_MERGE) + SLAB_FAILSLAB | SLAB_NO_MERGE | SLAB_OBJ_EXT_IN_OBJ) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_ACCOUNT) diff --git a/mm/slub.c b/mm/slub.c index ad9a1cae48b2..6689131761c5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -937,6 +937,26 @@ static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab) obj_exts += obj_exts_offset_in_slab(s, slab); return obj_exts == slab_obj_exts(slab); } + +static bool obj_exts_in_object(struct kmem_cache *s) +{ + return s->flags & SLAB_OBJ_EXT_IN_OBJ; +} + +static unsigned int obj_exts_offset_in_object(struct kmem_cache *s) +{ + unsigned int offset = get_info_end(s); + + if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) + offset += sizeof(struct track) * 2; + + if (slub_debug_orig_size(s)) + offset += ALIGN(sizeof(unsigned int), sizeof(unsigned long)); + + offset += kasan_metadata_size(s, false); + + return offset; +} #else static inline bool need_slab_obj_exts(struct kmem_cache *s) { @@ -964,6 +984,17 @@ static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab) { return false; } + +static inline bool obj_exts_in_object(struct kmem_cache *s) +{ + return false; +} + +static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s) +{ + return 0; +} + #endif #ifdef CONFIG_SLUB_DEBUG @@ -1272,6 +1303,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p) off += kasan_metadata_size(s, false); + if (obj_exts_in_object(s)) + off += sizeof(struct slabobj_ext); + if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ print_section(KERN_ERR, "Padding ", p + off, @@ -1441,7 +1475,10 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab, * A. Free pointer (if we cannot overwrite object on free) * B. Tracking data for SLAB_STORE_USER * C. Original request size for kmalloc object (SLAB_STORE_USER enabled) - * D. Padding to reach required alignment boundary or at minimum + * D. KASAN alloc metadata (KASAN enabled) + * E. struct slabobj_ext to store accounting metadata + * (SLAB_OBJ_EXT_IN_OBJ enabled) + * F. Padding to reach required alignment boundary or at minimum * one word if debugging is on to be able to detect writes * before the word boundary. * @@ -1470,6 +1507,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) off += kasan_metadata_size(s, false); + if (obj_exts_in_object(s)) + off += sizeof(struct slabobj_ext); + if (size_from_object(s) == off) return 1; @@ -2236,7 +2276,8 @@ static inline void free_slab_obj_exts(struct slab *slab) if (!obj_exts) return; - if (obj_exts_in_slab(slab->slab_cache, slab)) { + if (obj_exts_in_slab(slab->slab_cache, slab) || + obj_exts_in_object(slab->slab_cache)) { slab->obj_exts = 0; return; } @@ -2273,6 +2314,21 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab) if (IS_ENABLED(CONFIG_MEMCG)) slab->obj_exts |= MEMCG_DATA_OBJEXTS; slab_set_stride(slab, sizeof(struct slabobj_ext)); + } else if (obj_exts_in_object(s)) { + unsigned int offset = obj_exts_offset_in_object(s); + + slab->obj_exts = (unsigned long)slab_address(slab); + slab->obj_exts += s->red_left_pad; + slab->obj_exts += obj_exts_offset_in_object(s); + if (IS_ENABLED(CONFIG_MEMCG)) + slab->obj_exts |= MEMCG_DATA_OBJEXTS; + slab_set_stride(slab, s->size); + + for_each_object(addr, s, slab_address(slab), slab->objects) { + kasan_unpoison_range(addr + offset, + sizeof(struct slabobj_ext)); + memset(addr + offset, 0, sizeof(struct slabobj_ext)); + } } } @@ -7354,6 +7410,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s) { slab_flags_t flags = s->flags; unsigned int size = s->object_size; + unsigned int aligned_size; unsigned int order; /* @@ -7466,7 +7523,13 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s) * offset 0. In order to align the objects we have to simply size * each object to conform to the alignment. */ - size = ALIGN(size, s->align); + aligned_size = ALIGN(size, s->align); +#ifdef CONFIG_SLAB_OBJ_EXT + if (aligned_size - size >= sizeof(struct slabobj_ext)) + s->flags |= SLAB_OBJ_EXT_IN_OBJ; +#endif + size = aligned_size; + s->size = size; s->reciprocal_size = reciprocal_value(size); order = calculate_order(size); -- 2.43.0