Adapt pidfs to use the rhashtable-based xattr path by switching from a dedicated slab cache to simple_xattrs_alloc(). Previously pidfs used a custom kmem_cache (pidfs_xattr_cachep) that allocated a struct containing an embedded simple_xattrs plus simple_xattrs_init(). Replace this with simple_xattrs_alloc() which combines kzalloc + rhashtable_init, and drop the dedicated slab cache entirely. Use simple_xattr_free_rcu() for replaced xattr entries to allow concurrent RCU readers to finish. Signed-off-by: Christian Brauner --- fs/pidfs.c | 65 +++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 1e20e36e0ed5..cb62000681df 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "internal.h" @@ -29,7 +30,6 @@ #define PIDFS_PID_DEAD ERR_PTR(-ESRCH) static struct kmem_cache *pidfs_attr_cachep __ro_after_init; -static struct kmem_cache *pidfs_xattr_cachep __ro_after_init; static struct path pidfs_root_path = {}; @@ -44,9 +44,8 @@ enum pidfs_attr_mask_bits { PIDFS_ATTR_BIT_COREDUMP = 1, }; -struct pidfs_attr { +struct pidfs_anon_attr { unsigned long attr_mask; - struct simple_xattrs *xattrs; struct /* exit info */ { __u64 cgroupid; __s32 exit_code; @@ -55,6 +54,14 @@ struct pidfs_attr { __u32 coredump_signal; }; +struct pidfs_attr { + struct simple_xattrs *xattrs; + union { + struct pidfs_anon_attr; + struct llist_node pidfs_llist; + }; +}; + static struct rb_root pidfs_ino_tree = RB_ROOT; #if BITS_PER_LONG == 32 @@ -147,10 +154,30 @@ void pidfs_remove_pid(struct pid *pid) write_seqcount_end(&pidmap_lock_seq); } +static LLIST_HEAD(pidfs_free_list); + +static void pidfs_free_attr_work(struct work_struct *work) +{ + struct pidfs_attr *attr, *next; + struct llist_node *head; + + head = llist_del_all(&pidfs_free_list); + llist_for_each_entry_safe(attr, next, head, pidfs_llist) { + struct simple_xattrs *xattrs = attr->xattrs; + + if (xattrs) { + simple_xattrs_free(xattrs, NULL); + kfree(xattrs); + } + kfree(attr); + } +} + +static DECLARE_WORK(pidfs_free_work, pidfs_free_attr_work); + void pidfs_free_pid(struct pid *pid) { - struct pidfs_attr *attr __free(kfree) = no_free_ptr(pid->attr); - struct simple_xattrs *xattrs __free(kfree) = NULL; + struct pidfs_attr *attr = pid->attr; /* * Any dentry must've been wiped from the pid by now. @@ -169,9 +196,10 @@ void pidfs_free_pid(struct pid *pid) if (IS_ERR(attr)) return; - xattrs = no_free_ptr(attr->xattrs); - if (xattrs) - simple_xattrs_free(xattrs, NULL); + if (likely(!attr->xattrs)) + kfree(attr); + else if (llist_add(&attr->pidfs_llist, &pidfs_free_list)) + schedule_work(&pidfs_free_work); } #ifdef CONFIG_PROC_FS @@ -998,7 +1026,7 @@ static int pidfs_xattr_get(const struct xattr_handler *handler, xattrs = READ_ONCE(attr->xattrs); if (!xattrs) - return 0; + return -ENODATA; name = xattr_full_name(handler, suffix); return simple_xattr_get(xattrs, name, value, size); @@ -1018,22 +1046,16 @@ static int pidfs_xattr_set(const struct xattr_handler *handler, /* Ensure we're the only one to set @attr->xattrs. */ WARN_ON_ONCE(!inode_is_locked(inode)); - xattrs = READ_ONCE(attr->xattrs); - if (!xattrs) { - xattrs = kmem_cache_zalloc(pidfs_xattr_cachep, GFP_KERNEL); - if (!xattrs) - return -ENOMEM; - - simple_xattrs_init(xattrs); - smp_store_release(&pid->attr->xattrs, xattrs); - } + xattrs = simple_xattrs_lazy_alloc(&attr->xattrs, value, flags); + if (IS_ERR_OR_NULL(xattrs)) + return PTR_ERR(xattrs); name = xattr_full_name(handler, suffix); old_xattr = simple_xattr_set(xattrs, name, value, size, flags); if (IS_ERR(old_xattr)) return PTR_ERR(old_xattr); - simple_xattr_free(old_xattr); + simple_xattr_free_rcu(old_xattr); return 0; } @@ -1108,11 +1130,6 @@ void __init pidfs_init(void) (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT | SLAB_PANIC), NULL); - pidfs_xattr_cachep = kmem_cache_create("pidfs_xattr_cache", - sizeof(struct simple_xattrs), 0, - (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_ACCOUNT | SLAB_PANIC), NULL); - pidfs_mnt = kern_mount(&pidfs_type); if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); -- 2.47.3