Introduce rhashtable that will contain all inode connectors for a superblock. As for some filesystems inode number is not enough to identify inode, provide enough flexibility for such filesystems to provide their own keys for the hash. Eventually we will use this hash table to track all inode connectors (and thus marks) for the superblock and also to track inode marks for inodes that were evicted from memory (so that inode marks don't have to pin inodes in memory anymore). Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 12 ++- fs/notify/fsnotify.h | 4 +- fs/notify/mark.c | 176 +++++++++++++++++++++++++++---- include/linux/fs.h | 3 + include/linux/fsnotify.h | 9 ++ include/linux/fsnotify_backend.h | 6 +- 6 files changed, 187 insertions(+), 23 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 079b868552c2..46db712c83ec 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -110,9 +110,16 @@ void fsnotify_sb_delete(struct super_block *sb) FSNOTIFY_PRIO_PRE_CONTENT)); } +void fsnotify_free_sb_info(struct fsnotify_sb_info *sbinfo) +{ + rhashtable_destroy(&sbinfo->inode_conn_hash); + kfree(sbinfo); +} + void fsnotify_sb_free(struct super_block *sb) { - kfree(sb->s_fsnotify_info); + if (sb->s_fsnotify_info) + fsnotify_free_sb_info(sb->s_fsnotify_info); } /* @@ -770,8 +777,7 @@ static __init int fsnotify_init(void) if (ret) panic("initializing fsnotify_mark_srcu"); - fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, - SLAB_PANIC); + fsnotify_init_connector_caches(); return 0; } diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 860a07ada7fd..e9160c0e1a70 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -108,6 +108,8 @@ static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns) */ extern void fsnotify_set_children_dentry_flags(struct inode *inode); -extern struct kmem_cache *fsnotify_mark_connector_cachep; +void fsnotify_free_sb_info(struct fsnotify_sb_info *sbinfo); + +void fsnotify_init_connector_caches(void); #endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/fs/notify/mark.c b/fs/notify/mark.c index ecd2c3944051..fd1fe8d37c36 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -79,7 +79,8 @@ #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ struct srcu_struct fsnotify_mark_srcu; -struct kmem_cache *fsnotify_mark_connector_cachep; +static struct kmem_cache *fsnotify_mark_connector_cachep; +static struct kmem_cache *fsnotify_inode_mark_connector_cachep; static DEFINE_SPINLOCK(destroy_lock); static LIST_HEAD(destroy_list); @@ -91,6 +92,8 @@ static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); static void fsnotify_connector_destroy_workfn(struct work_struct *work); static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); +static void fsnotify_unhash_connector(struct fsnotify_mark_connector *conn); + void fsnotify_get_mark(struct fsnotify_mark *mark) { WARN_ON_ONCE(!refcount_read(&mark->refcnt)); @@ -323,7 +326,7 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) while (conn) { free = conn; conn = conn->destroy_next; - kmem_cache_free(fsnotify_mark_connector_cachep, free); + kfree(free); } } @@ -342,6 +345,7 @@ static void *fsnotify_detach_connector_from_object( if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; + fsnotify_unhash_connector(conn); /* Unpin inode when detaching from connector */ if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF)) @@ -384,6 +388,15 @@ static void fsnotify_drop_object(unsigned int type, void *objp) fsnotify_put_inode_ref(objp); } +static void fsnotify_free_connector(struct fsnotify_mark_connector *conn) +{ + spin_lock(&destroy_lock); + conn->destroy_next = connector_destroy_list; + connector_destroy_list = conn; + spin_unlock(&destroy_lock); + queue_work(system_unbound_wq, &connector_reaper_work); +} + void fsnotify_put_mark(struct fsnotify_mark *mark) { struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector); @@ -421,13 +434,8 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) fsnotify_drop_object(type, objp); - if (free_conn) { - spin_lock(&destroy_lock); - conn->destroy_next = connector_destroy_list; - connector_destroy_list = conn; - spin_unlock(&destroy_lock); - queue_work(system_unbound_wq, &connector_reaper_work); - } + if (free_conn) + fsnotify_free_connector(conn); /* * Note that we didn't update flags telling whether inode cares about * what's happening with children. We update these flags from @@ -633,22 +641,136 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } +/* + * Inode connector for filesystems where inode->i_ino uniquely identifies the + * inode. + */ +struct fsnotify_inode_mark_connector { + struct fsnotify_mark_connector common; + ino_t ino; + struct rhash_head hash_list; +}; + +/* Rhashtable parameters for filesystems using fsnotify_inode_mark_connector */ +static const struct rhashtable_params generic_inode_conn_hash_params = { + .key_len = sizeof(ino_t), + .key_offset = offsetof(struct fsnotify_inode_mark_connector, ino), + .head_offset = offsetof(struct fsnotify_inode_mark_connector, hash_list), +}; + +static const struct rhashtable_params * +fsnotify_get_conn_hash_params(const struct super_block *sb) +{ + if (sb->s_fsnotify_op) + return &sb->s_fsnotify_op->inode_conn_hash_params; + return &generic_inode_conn_hash_params; +} + +static struct rhash_head *fsnotify_conn_hash_head( + struct fsnotify_mark_connector *conn, + const struct rhashtable_params *params) +{ + return (void *)conn - + offsetof(struct fsnotify_inode_mark_connector, common) + + params->head_offset; +} + +static void fsnotify_init_connector(struct fsnotify_mark_connector *conn, + void *obj, unsigned int obj_type) +{ + spin_lock_init(&conn->lock); + INIT_HLIST_HEAD(&conn->list); + conn->flags = 0; + conn->prio = 0; + conn->type = obj_type; + conn->obj = obj; +} + +/* + * Initialize generic part of inode connector and insert the connector into + * the hash of inode connectors. + */ +int fsnotify_init_inode_connector(struct fsnotify_mark_connector *conn, + struct inode *inode) +{ + struct fsnotify_sb_info *info = fsnotify_sb_info(inode->i_sb); + const struct rhashtable_params *params = + fsnotify_get_conn_hash_params(inode->i_sb); + int ret; + + fsnotify_init_connector(conn, inode, FSNOTIFY_OBJ_TYPE_INODE); + ret = rhashtable_lookup_insert_fast(&info->inode_conn_hash, + fsnotify_conn_hash_head(conn, params), + *params); + if (!ret) + conn->flags |= FSNOTIFY_CONN_FLAG_HASHED; + return ret; +} +EXPORT_SYMBOL(fsnotify_init_inode_connector); + +/* + * Inode connector allocation function for filesystems using + * fsnotify_inode_mark_connector + */ +static struct fsnotify_mark_connector * +fsnotify_alloc_inode_connector(struct inode *inode) +{ + struct fsnotify_inode_mark_connector *iconn; + + iconn = kmem_cache_alloc(fsnotify_inode_mark_connector_cachep, + GFP_KERNEL); + if (!iconn) + return NULL; + iconn->ino = inode->i_ino; + if (fsnotify_init_inode_connector(&iconn->common, inode)) { + kfree(iconn); + return NULL; + } + + return &iconn->common; +} + +static void fsnotify_unhash_connector(struct fsnotify_mark_connector *conn) +{ + if (!(conn->flags & FSNOTIFY_CONN_FLAG_HASHED)) + return; + + struct super_block *sb = fsnotify_connector_sb(conn); + struct fsnotify_sb_info *info = fsnotify_sb_info(sb); + const struct rhashtable_params *params = + fsnotify_get_conn_hash_params(sb); + + WARN_ON_ONCE(conn->type != FSNOTIFY_OBJ_TYPE_INODE && + conn->type != FSNOTIFY_OBJ_TYPE_DETACHED); + WARN_ON_ONCE(rhashtable_remove_fast(&info->inode_conn_hash, + fsnotify_conn_hash_head(conn, params), + *params)); + conn->flags &= ~FSNOTIFY_CONN_FLAG_HASHED; +} + static int fsnotify_attach_info_to_sb(struct super_block *sb) { struct fsnotify_sb_info *sbinfo; + int err; /* sb info is freed on fsnotify_sb_delete() */ sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); if (!sbinfo) return -ENOMEM; + err = rhashtable_init(&sbinfo->inode_conn_hash, + fsnotify_get_conn_hash_params(sb)); + if (err) { + kfree(sbinfo); + return err; + } /* * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() * will observe an initialized structure */ if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) { /* Someone else created sbinfo for us */ - kfree(sbinfo); + fsnotify_free_sb_info(sbinfo); } return 0; } @@ -658,15 +780,23 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, { struct fsnotify_mark_connector *conn; - conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); + if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { + struct inode *inode = obj; + const struct fsnotify_sb_operations *ops = + inode->i_sb->s_fsnotify_op; + + if (ops && ops->alloc_inode_connector) + conn = ops->alloc_inode_connector(inode); + else + conn = fsnotify_alloc_inode_connector(inode); + } else { + conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, + GFP_KERNEL); + if (conn) + fsnotify_init_connector(conn, obj, obj_type); + } if (!conn) return -ENOMEM; - spin_lock_init(&conn->lock); - INIT_HLIST_HEAD(&conn->list); - conn->flags = 0; - conn->prio = 0; - conn->type = obj_type; - conn->obj = obj; /* * cmpxchg() provides the barrier so that readers of *connp can see @@ -674,7 +804,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, */ if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ - kmem_cache_free(fsnotify_mark_connector_cachep, conn); + fsnotify_unhash_connector(conn); + fsnotify_free_connector(conn); } return 0; } @@ -1004,3 +1135,12 @@ void fsnotify_wait_marks_destroyed(void) flush_delayed_work(&reaper_work); } EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed); + +__init void fsnotify_init_connector_caches(void) +{ + fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, + SLAB_PANIC); + fsnotify_inode_mark_connector_cachep = KMEM_CACHE( + fsnotify_inode_mark_connector, + SLAB_PANIC); +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 601d036a6c78..98890bb1592a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1325,6 +1325,8 @@ struct sb_writers { struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; +struct fsnotify_sb_operations; + struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ @@ -1385,6 +1387,7 @@ struct super_block { #ifdef CONFIG_FSNOTIFY u32 s_fsnotify_mask; struct fsnotify_sb_info *s_fsnotify_info; + const struct fsnotify_sb_operations *s_fsnotify_op; #endif /* diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 28a9cb13fbfa..2f94809cca2a 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,6 +17,15 @@ #include #include +struct fsnotify_sb_operations { + struct fsnotify_mark_connector *(*alloc_inode_connector)(struct inode *inode); + + const struct rhashtable_params inode_conn_hash_params; +}; + +int fsnotify_init_inode_connector(struct fsnotify_mark_connector *conn, + struct inode *inode); + /* Are there any inode/mount/sb objects watched with priority prio or above? */ static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb, int prio) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d4034ddaf392..0a163c10b5e2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -21,6 +21,7 @@ #include #include #include +#include /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -544,6 +545,7 @@ struct fsnotify_mark_connector { unsigned char prio; /* Highest priority group */ #define FSNOTIFY_CONN_FLAG_IS_WATCHED 0x01 #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 +#define FSNOTIFY_CONN_FLAG_HASHED 0x04 unsigned short flags; /* flags [lock] */ union { /* Object pointer [lock] */ @@ -551,7 +553,7 @@ struct fsnotify_mark_connector { /* Used listing heads to free after srcu period expires */ struct fsnotify_mark_connector *destroy_next; }; - struct hlist_head list; + struct hlist_head list; /* List of marks */ }; /* @@ -560,6 +562,8 @@ struct fsnotify_mark_connector { */ struct fsnotify_sb_info { struct fsnotify_mark_connector __rcu *sb_marks; + /* Hash of connectors for inode marks */ + struct rhashtable inode_conn_hash; /* * Number of inode/mount/sb objects that are being watched in this sb. * Note that inodes objects are currently double-accounted. -- 2.51.0