__break_lease() currently overrides the flc_flags field in the lease after allocating it. A forthcoming patch will add the ability to request a FL_DELEG type lease. Instead of overriding the flags field, add a flags argument to lease_alloc() and lease_init() so it's set correctly after allocating. Signed-off-by: Jeff Layton --- fs/locks.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 04a3f0e2072461b6e2d3d1cd12f2b089d69a7db3..b33c327c21dcd49341fbeac47caeb72cdf7455db 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -585,7 +585,7 @@ static const struct lease_manager_operations lease_manager_ops = { /* * Initialize a lease, use the default lock manager operations */ -static int lease_init(struct file *filp, int type, struct file_lease *fl) +static int lease_init(struct file *filp, unsigned int flags, int type, struct file_lease *fl) { if (assign_type(&fl->c, type) != 0) return -EINVAL; @@ -594,13 +594,13 @@ static int lease_init(struct file *filp, int type, struct file_lease *fl) fl->c.flc_pid = current->tgid; fl->c.flc_file = filp; - fl->c.flc_flags = FL_LEASE; + fl->c.flc_flags = flags; fl->fl_lmops = &lease_manager_ops; return 0; } /* Allocate a file_lock initialised to this type of lease */ -static struct file_lease *lease_alloc(struct file *filp, int type) +static struct file_lease *lease_alloc(struct file *filp, unsigned int flags, int type) { struct file_lease *fl = locks_alloc_lease(); int error = -ENOMEM; @@ -608,7 +608,7 @@ static struct file_lease *lease_alloc(struct file *filp, int type) if (fl == NULL) return ERR_PTR(error); - error = lease_init(filp, type, fl); + error = lease_init(filp, flags, type, fl); if (error) { locks_free_lease(fl); return ERR_PTR(error); @@ -1548,10 +1548,9 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); - new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); + new_fl = lease_alloc(NULL, type, want_write ? F_WRLCK : F_RDLCK); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); - new_fl->c.flc_flags = type; /* typically we will check that ctx is non-NULL before calling */ ctx = locks_inode_context(inode); @@ -2033,7 +2032,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg) struct fasync_struct *new; int error; - fl = lease_alloc(filp, arg); + fl = lease_alloc(filp, FL_LEASE, arg); if (IS_ERR(fl)) return PTR_ERR(fl); -- 2.51.1 Currently __break_lease takes both a type and an openmode. With the addition of directory leases, that makes less sense. Declare a set of LEASE_BREAK_* flags that can be used to control how lease breaks work instead of requiring a type and an openmode. Signed-off-by: Jeff Layton --- fs/locks.c | 29 +++++++++++++++++---------- include/linux/filelock.h | 52 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 25 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index b33c327c21dcd49341fbeac47caeb72cdf7455db..3cdd84a0fbedc9bd1b47725a9cf963342aafbce9 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1529,24 +1529,31 @@ any_leases_conflict(struct inode *inode, struct file_lease *breaker) /** * __break_lease - revoke all outstanding leases on file * @inode: the inode of the file to return - * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: - * break all leases - * @type: FL_LEASE: break leases and delegations; FL_DELEG: break - * only delegations + * @flags: LEASE_BREAK_* flags * * break_lease (inlined for speed) has checked there already is at least * some kind of lock (maybe a lease) on this file. Leases are broken on - * a call to open() or truncate(). This function can sleep unless you - * specified %O_NONBLOCK to your open(). + * a call to open() or truncate(). This function can block waiting for the + * lease break unless you specify LEASE_BREAK_NONBLOCK. */ -int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) +int __break_lease(struct inode *inode, unsigned int flags) { - int error = 0; - struct file_lock_context *ctx; struct file_lease *new_fl, *fl, *tmp; + struct file_lock_context *ctx; unsigned long break_time; - int want_write = (mode & O_ACCMODE) != O_RDONLY; + unsigned int type; LIST_HEAD(dispose); + bool want_write = !(flags & LEASE_BREAK_OPEN_RDONLY); + int error = 0; + + if (flags & LEASE_BREAK_LEASE) + type = FL_LEASE; + else if (flags & LEASE_BREAK_DELEG) + type = FL_DELEG; + else if (flags & LEASE_BREAK_LAYOUT) + type = FL_LAYOUT; + else + return -EINVAL; new_fl = lease_alloc(NULL, type, want_write ? F_WRLCK : F_RDLCK); if (IS_ERR(new_fl)) @@ -1595,7 +1602,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) if (list_empty(&ctx->flc_lease)) goto out; - if (mode & O_NONBLOCK) { + if (flags & LEASE_BREAK_NONBLOCK) { trace_break_lease_noblock(inode, new_fl); error = -EWOULDBLOCK; goto out; diff --git a/include/linux/filelock.h b/include/linux/filelock.h index c2ce8ba05d068b451ecf8f513b7e532819a29944..47da6aa28d8dc9122618d02c6608deda0f3c4d3e 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -212,7 +212,14 @@ int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); void locks_init_lease(struct file_lease *); void locks_free_lease(struct file_lease *fl); struct file_lease *locks_alloc_lease(void); -int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); + +#define LEASE_BREAK_LEASE BIT(0) // break leases and delegations +#define LEASE_BREAK_DELEG BIT(1) // break delegations only +#define LEASE_BREAK_LAYOUT BIT(2) // break layouts only +#define LEASE_BREAK_NONBLOCK BIT(3) // non-blocking break +#define LEASE_BREAK_OPEN_RDONLY BIT(4) // readonly open event + +int __break_lease(struct inode *inode, unsigned int flags); void lease_get_mtime(struct inode *, struct timespec64 *time); int generic_setlease(struct file *, int, struct file_lease **, void **priv); int kernel_setlease(struct file *, int, struct file_lease **, void **); @@ -367,7 +374,7 @@ static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *f return -ENOLCK; } -static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) +static inline int __break_lease(struct inode *inode, unsigned int flags) { return 0; } @@ -428,6 +435,17 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) } #ifdef CONFIG_FILE_LOCKING +static inline unsigned int openmode_to_lease_flags(unsigned int mode) +{ + unsigned int flags = 0; + + if ((mode & O_ACCMODE) == O_RDONLY) + flags |= LEASE_BREAK_OPEN_RDONLY; + if (mode & O_NONBLOCK) + flags |= LEASE_BREAK_NONBLOCK; + return flags; +} + static inline int break_lease(struct inode *inode, unsigned int mode) { struct file_lock_context *flctx; @@ -443,11 +461,11 @@ static inline int break_lease(struct inode *inode, unsigned int mode) return 0; smp_mb(); if (!list_empty_careful(&flctx->flc_lease)) - return __break_lease(inode, mode, FL_LEASE); + return __break_lease(inode, LEASE_BREAK_LEASE | openmode_to_lease_flags(mode)); return 0; } -static inline int break_deleg(struct inode *inode, unsigned int mode) +static inline int break_deleg(struct inode *inode, unsigned int flags) { struct file_lock_context *flctx; @@ -461,8 +479,10 @@ static inline int break_deleg(struct inode *inode, unsigned int mode) if (!flctx) return 0; smp_mb(); - if (!list_empty_careful(&flctx->flc_lease)) - return __break_lease(inode, mode, FL_DELEG); + if (!list_empty_careful(&flctx->flc_lease)) { + flags |= LEASE_BREAK_DELEG; + return __break_lease(inode, flags); + } return 0; } @@ -470,7 +490,7 @@ static inline int try_break_deleg(struct inode *inode, struct inode **delegated_ { int ret; - ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); + ret = break_deleg(inode, LEASE_BREAK_NONBLOCK); if (ret == -EWOULDBLOCK && delegated_inode) { *delegated_inode = inode; ihold(inode); @@ -482,7 +502,7 @@ static inline int break_deleg_wait(struct inode **delegated_inode) { int ret; - ret = break_deleg(*delegated_inode, O_WRONLY); + ret = break_deleg(*delegated_inode, 0); iput(*delegated_inode); *delegated_inode = NULL; return ret; @@ -491,20 +511,24 @@ static inline int break_deleg_wait(struct inode **delegated_inode) static inline int break_layout(struct inode *inode, bool wait) { smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) - return __break_lease(inode, - wait ? O_WRONLY : O_WRONLY | O_NONBLOCK, - FL_LAYOUT); + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) { + unsigned int flags = LEASE_BREAK_LAYOUT; + + if (!wait) + flags |= LEASE_BREAK_NONBLOCK; + + return __break_lease(inode, flags); + } return 0; } #else /* !CONFIG_FILE_LOCKING */ -static inline int break_lease(struct inode *inode, unsigned int mode) +static inline int break_lease(struct inode *inode, bool wait) { return 0; } -static inline int break_deleg(struct inode *inode, unsigned int mode) +static inline int break_deleg(struct inode *inode, unsigned int flags) { return 0; } -- 2.51.1 The current API requires a pointer to an inode pointer. It's easy for callers to get this wrong. Add a new delegated_inode structure and use that to pass back any inode that needs to be waited on. Signed-off-by: Jeff Layton --- fs/attr.c | 2 +- fs/namei.c | 18 +++++++++--------- fs/open.c | 8 ++++---- fs/posix_acl.c | 8 ++++---- fs/utimes.c | 4 ++-- fs/xattr.c | 12 ++++++------ include/linux/filelock.h | 36 +++++++++++++++++++++++++++--------- include/linux/fs.h | 9 +++++---- include/linux/xattr.h | 4 ++-- 9 files changed, 60 insertions(+), 41 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 795f231d00e8eaaadf5b62f241655cb4b69cb507..b9ec6b47bab2fc2b561677b639633bd32994022f 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -415,7 +415,7 @@ EXPORT_SYMBOL(may_setattr); * performed on the raw inode simply pass @nop_mnt_idmap. */ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, - struct iattr *attr, struct inode **delegated_inode) + struct iattr *attr, struct delegated_inode *delegated_inode) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; diff --git a/fs/namei.c b/fs/namei.c index 7377020a2cba02501483020e0fc93c279fb38d3e..bf42f146f847a5330fc581595c7256af28d9db90 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4648,7 +4648,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) * raw inode simply pass @nop_mnt_idmap. */ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, struct inode **delegated_inode) + struct dentry *dentry, struct delegated_inode *delegated_inode) { struct inode *target = dentry->d_inode; int error = may_delete(idmap, dir, dentry, 0); @@ -4706,7 +4706,7 @@ int do_unlinkat(int dfd, struct filename *name) struct qstr last; int type; struct inode *inode = NULL; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; unsigned int lookup_flags = 0; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); @@ -4743,7 +4743,7 @@ int do_unlinkat(int dfd, struct filename *name) if (inode) iput(inode); /* truncate the inode here */ inode = NULL; - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; @@ -4892,7 +4892,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn */ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, struct inode *dir, struct dentry *new_dentry, - struct inode **delegated_inode) + struct delegated_inode *delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -4968,7 +4968,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd, struct mnt_idmap *idmap; struct dentry *new_dentry; struct path old_path, new_path; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; int how = 0; int error; @@ -5012,7 +5012,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd, new_dentry, &delegated_inode); out_dput: end_creating_path(&new_path, new_dentry); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) { path_put(&old_path); @@ -5098,7 +5098,7 @@ int vfs_rename(struct renamedata *rd) struct inode *new_dir = d_inode(rd->new_parent); struct dentry *old_dentry = rd->old_dentry; struct dentry *new_dentry = rd->new_dentry; - struct inode **delegated_inode = rd->delegated_inode; + struct delegated_inode *delegated_inode = rd->delegated_inode; unsigned int flags = rd->flags; bool is_dir = d_is_dir(old_dentry); struct inode *source = old_dentry->d_inode; @@ -5261,7 +5261,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, struct path old_path, new_path; struct qstr old_last, new_last; int old_type, new_type; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; bool should_retry = false; @@ -5369,7 +5369,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, exit3: unlock_rename(new_path.dentry, old_path.dentry); exit_lock_rename: - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/fs/open.c b/fs/open.c index 3d64372ecc675e4795eb0a0deda10f8f67b95640..fdaa6f08f6f4cac5c2fefd3eafa5e430e51f3979 100644 --- a/fs/open.c +++ b/fs/open.c @@ -631,7 +631,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) int chmod_common(const struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; struct iattr newattrs; int error; @@ -651,7 +651,7 @@ int chmod_common(const struct path *path, umode_t mode) &newattrs, &delegated_inode); out_unlock: inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; @@ -756,7 +756,7 @@ int chown_common(const struct path *path, uid_t user, gid_t group) struct mnt_idmap *idmap; struct user_namespace *fs_userns; struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; int error; struct iattr newattrs; kuid_t uid; @@ -791,7 +791,7 @@ int chown_common(const struct path *path, uid_t user, gid_t group) error = notify_change(idmap, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4050942ab52f95741da2df13d191ade5c5ca12a2..768f027c142811ea907fe8545155ba7abd016305 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -1091,7 +1091,7 @@ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, int acl_type; int error; struct inode *inode = d_inode(dentry); - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; acl_type = posix_acl_type(acl_name); if (acl_type < 0) @@ -1141,7 +1141,7 @@ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, out_inode_unlock: inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; @@ -1212,7 +1212,7 @@ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, int acl_type; int error; struct inode *inode = d_inode(dentry); - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; acl_type = posix_acl_type(acl_name); if (acl_type < 0) @@ -1249,7 +1249,7 @@ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, out_inode_unlock: inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/fs/utimes.c b/fs/utimes.c index c7c7958e57b22f91646ca9f76d18781b64d371a3..bf9f45bdef54947de7ac55c9f873ae9d0336dafa 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -22,7 +22,7 @@ int vfs_utimes(const struct path *path, struct timespec64 *times) int error; struct iattr newattrs; struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; if (times) { if (!nsec_valid(times[0].tv_nsec) || @@ -66,7 +66,7 @@ int vfs_utimes(const struct path *path, struct timespec64 *times) error = notify_change(mnt_idmap(path->mnt), path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/fs/xattr.c b/fs/xattr.c index 8851a5ef34f5ab34383975dd4cef537de3f6391e..32d445fb60aaf2aaf4b16b62934dc99bad378067 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -274,7 +274,7 @@ int __vfs_setxattr_noperm(struct mnt_idmap *idmap, int __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, - int flags, struct inode **delegated_inode) + int flags, struct delegated_inode *delegated_inode) { struct inode *inode = dentry->d_inode; int error; @@ -305,7 +305,7 @@ vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; const void *orig_value = value; int error; @@ -322,7 +322,7 @@ vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, flags, &delegated_inode); inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; @@ -533,7 +533,7 @@ EXPORT_SYMBOL(__vfs_removexattr); int __vfs_removexattr_locked(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, - struct inode **delegated_inode) + struct delegated_inode *delegated_inode) { struct inode *inode = dentry->d_inode; int error; @@ -567,7 +567,7 @@ vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name) { struct inode *inode = dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; int error; retry_deleg: @@ -576,7 +576,7 @@ vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, name, &delegated_inode); inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 47da6aa28d8dc9122618d02c6608deda0f3c4d3e..208d108df2d73a9df65e5dc9968d074af385f881 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -486,25 +486,35 @@ static inline int break_deleg(struct inode *inode, unsigned int flags) return 0; } -static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) +struct delegated_inode { + struct inode *di_inode; +}; + +static inline bool is_delegated(struct delegated_inode *di) +{ + return di->di_inode; +} + +static inline int try_break_deleg(struct inode *inode, + struct delegated_inode *di) { int ret; ret = break_deleg(inode, LEASE_BREAK_NONBLOCK); - if (ret == -EWOULDBLOCK && delegated_inode) { - *delegated_inode = inode; + if (ret == -EWOULDBLOCK && di) { + di->di_inode = inode; ihold(inode); } return ret; } -static inline int break_deleg_wait(struct inode **delegated_inode) +static inline int break_deleg_wait(struct delegated_inode *di) { int ret; - ret = break_deleg(*delegated_inode, 0); - iput(*delegated_inode); - *delegated_inode = NULL; + ret = break_deleg(di->di_inode, 0); + iput(di->di_inode); + di->di_inode = NULL; return ret; } @@ -523,6 +533,13 @@ static inline int break_layout(struct inode *inode, bool wait) } #else /* !CONFIG_FILE_LOCKING */ +struct delegated_inode { }; + +static inline bool is_delegated(struct delegated_inode *di) +{ + return false; +} + static inline int break_lease(struct inode *inode, bool wait) { return 0; @@ -533,12 +550,13 @@ static inline int break_deleg(struct inode *inode, unsigned int flags) return 0; } -static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) +static inline int try_break_deleg(struct inode *inode, + struct delegated_inode *delegated_inode) { return 0; } -static inline int break_deleg_wait(struct inode **delegated_inode) +static inline int break_deleg_wait(struct delegated_inode *delegated_inode) { BUG(); return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index c895146c1444be36e0a779df55622cc38c9419ff..909a88e3979d4f1ba3104f3d05145e1096ed44d5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -80,6 +80,7 @@ struct fs_context; struct fs_parameter_spec; struct file_kattr; struct iomap_ops; +struct delegated_inode; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2119,10 +2120,10 @@ int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, - struct dentry *, struct inode **); + struct dentry *, struct delegated_inode *); int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *); int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, - struct inode **); + struct delegated_inode *); /** * struct renamedata - contains all information required for renaming @@ -2140,7 +2141,7 @@ struct renamedata { struct dentry *old_dentry; struct dentry *new_parent; struct dentry *new_dentry; - struct inode **delegated_inode; + struct delegated_inode *delegated_inode; unsigned int flags; } __randomize_layout; @@ -3071,7 +3072,7 @@ static inline int bmap(struct inode *inode, sector_t *block) #endif int notify_change(struct mnt_idmap *, struct dentry *, - struct iattr *, struct inode **); + struct iattr *, struct delegated_inode *); int inode_permission(struct mnt_idmap *, struct inode *, int); int generic_permission(struct mnt_idmap *, struct inode *, int); static inline int file_permission(struct file *file, int mask) diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 86b0d47984a16d935dd1c45ca80a3b8bb5b7295b..64e9afe7d647dc38f686a4b5c6f765e061cde54c 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -85,12 +85,12 @@ int __vfs_setxattr_noperm(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int, - struct inode **); + struct delegated_inode *); int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *, - const char *, struct inode **); + const char *, struct delegated_inode *); int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); -- 2.51.1 When nfsd starts requesting directory delegations, setlease handlers may see requests for leases on directories. Push the !S_ISREG check down into the non-trivial setlease handlers, so we can selectively enable them where they're supported. FUSE is special: It's the only filesystem that supports atomic_open and allows kernel-internal leases. atomic_open is issued when the VFS doesn't know the state of the dentry being opened. If the file doesn't exist, it may be created, in which case the dir lease should be broken. The existing kernel-internal lease implementation has no provision for this. Ensure that we don't allow directory leases by default going forward by explicitly disabling them there. Reviewed-by: NeilBrown Reviewed-by: Jan Kara Signed-off-by: Jeff Layton --- fs/fuse/dir.c | 1 + fs/locks.c | 5 +++-- fs/nfs/nfs4file.c | 2 ++ fs/smb/client/cifsfs.c | 3 +++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ecaec0fea3a132e7cbb88121e7db7fb504d57d3c..667774cc72a1d49796f531fcb342d2e4878beb85 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -2230,6 +2230,7 @@ static const struct file_operations fuse_dir_operations = { .fsync = fuse_dir_fsync, .unlocked_ioctl = fuse_dir_ioctl, .compat_ioctl = fuse_dir_compat_ioctl, + .setlease = simple_nosetlease, }; static const struct inode_operations fuse_common_inode_operations = { diff --git a/fs/locks.c b/fs/locks.c index 3cdd84a0fbedc9bd1b47725a9cf963342aafbce9..f5b210a2dc34c70ac36e972436c62482bbe32ca6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1935,6 +1935,9 @@ static int generic_delete_lease(struct file *filp, void *owner) int generic_setlease(struct file *filp, int arg, struct file_lease **flp, void **priv) { + if (!S_ISREG(file_inode(filp)->i_mode)) + return -EINVAL; + switch (arg) { case F_UNLCK: return generic_delete_lease(filp, *priv); @@ -2024,8 +2027,6 @@ vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE)) return -EACCES; - if (!S_ISREG(inode->i_mode)) - return -EINVAL; error = security_file_lock(filp, arg); if (error) return error; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 7f43e890d3564a000dab9365048a3e17dc96395c..7317f26892c5782a39660cae87ec1afea24e36c0 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -431,6 +431,8 @@ void nfs42_ssc_unregister_ops(void) static int nfs4_setlease(struct file *file, int arg, struct file_lease **lease, void **priv) { + if (!S_ISREG(file_inode(file)->i_mode)) + return -EINVAL; return nfs4_proc_setlease(file, arg, lease, priv); } diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 05b1fa76e8ccf1e86f0c174593cd6e1acb84608d..03c44c1d9bb631b87a8b67aa16e481d6bb3c7d14 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1149,6 +1149,9 @@ cifs_setlease(struct file *file, int arg, struct file_lease **lease, void **priv struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + /* Check if file is oplocked if this is request for new lease */ if (arg == F_UNLCK || ((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || -- 2.51.1 In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. vfs_link, vfs_unlink, and vfs_rename all have existing delegation break handling for the children in the rename. Add the necessary calls for breaking delegations in the parent(s) as well. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- fs/namei.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index bf42f146f847a5330fc581595c7256af28d9db90..5bcf3e93d350ffd290f72725c378d3dffeeae364 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4667,6 +4667,9 @@ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, else { error = security_inode_unlink(dir, dentry); if (!error) { + error = try_break_deleg(dir, delegated_inode); + if (error) + goto out; error = try_break_deleg(target, delegated_inode); if (error) goto out; @@ -4936,7 +4939,9 @@ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; else { - error = try_break_deleg(inode, delegated_inode); + error = try_break_deleg(dir, delegated_inode); + if (!error) + error = try_break_deleg(inode, delegated_inode); if (!error) error = dir->i_op->link(old_dentry, dir, new_dentry); } @@ -5203,6 +5208,14 @@ int vfs_rename(struct renamedata *rd) old_dir->i_nlink >= max_links) goto out; } + error = try_break_deleg(old_dir, delegated_inode); + if (error) + goto out; + if (new_dir != old_dir) { + error = try_break_deleg(new_dir, delegated_inode); + if (error) + goto out; + } if (!is_dir) { error = try_break_deleg(source, delegated_inode); if (error) -- 2.51.1 In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a new delegated_inode parameter to vfs_mkdir. All of the existing callers set that to NULL for now, except for do_mkdirat which will properly block until the lease is gone. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- drivers/base/devtmpfs.c | 2 +- fs/cachefiles/namei.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/init.c | 2 +- fs/namei.c | 24 ++++++++++++++++++------ fs/nfsd/nfs4recover.c | 2 +- fs/nfsd/vfs.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- fs/smb/server/vfs.c | 2 +- fs/xfs/scrub/orphanage.c | 2 +- include/linux/fs.h | 2 +- 11 files changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 9d4e46ad8352257a6a65d85526ebdbf9bf2d4b19..0e79621cb0f79870003b867ca384199171ded4e0 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -180,7 +180,7 @@ static int dev_mkdir(const char *name, umode_t mode) if (IS_ERR(dentry)) return PTR_ERR(dentry); - dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, NULL); if (!IS_ERR(dentry)) /* mark as kernel-created inode */ d_inode(dentry)->i_private = &thread; diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index d1edb2ac38376c4f9d2a18026450bb3c774f7824..50c0f9c76d1fd4c05db90d7d0d1bad574523ead0 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -130,7 +130,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, goto mkdir_error; ret = cachefiles_inject_write_error(); if (ret == 0) - subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); + subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL); else subdir = ERR_PTR(ret); if (IS_ERR(subdir)) { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ed1394da8d6bd7065f2a074378331f13fcda17f9..35830b3144f8f71374a78b3e7463b864f4fc216e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -508,7 +508,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, goto out; lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir, - lower_dentry, mode); + lower_dentry, mode, NULL); rc = PTR_ERR(lower_dentry); if (IS_ERR(lower_dentry)) goto out; diff --git a/fs/init.c b/fs/init.c index 07f592ccdba868509d0f3aaf9936d8d890fdbec5..895f8a09a71acfd03e11164e3b441a7d4e2de146 100644 --- a/fs/init.c +++ b/fs/init.c @@ -233,7 +233,7 @@ int __init init_mkdir(const char *pathname, umode_t mode) error = security_path_mkdir(&path, dentry, mode); if (!error) { dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, mode); + dentry, mode, NULL); if (IS_ERR(dentry)) error = PTR_ERR(dentry); } diff --git a/fs/namei.c b/fs/namei.c index 5bcf3e93d350ffd290f72725c378d3dffeeae364..76c0587d991ff7307e3dde69497719d716c8d7b8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4407,10 +4407,11 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d /** * vfs_mkdir - create directory returning correct dentry if possible - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child directory - * @mode: mode of the child directory + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @mode: mode of the child directory + * @delegated_inode: returns parent inode, if the inode is delegated. * * Create a directory. * @@ -4427,7 +4428,8 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d * In case of an error the dentry is dput() and an ERR_PTR() is returned. */ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct dentry *dentry, umode_t mode, + struct delegated_inode *delegated_inode) { int error; unsigned max_links = dir->i_sb->s_max_links; @@ -4450,6 +4452,10 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (max_links && dir->i_nlink >= max_links) goto err; + error = try_break_deleg(dir, delegated_inode); + if (error) + goto err; + de = dir->i_op->mkdir(idmap, dir, dentry, mode); error = PTR_ERR(de); if (IS_ERR(de)) @@ -4473,6 +4479,7 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode) struct path path; int error; unsigned int lookup_flags = LOOKUP_DIRECTORY; + struct delegated_inode delegated_inode = { }; retry: dentry = filename_create(dfd, name, &path, lookup_flags); @@ -4484,11 +4491,16 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode) mode_strip_umask(path.dentry->d_inode, mode)); if (!error) { dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, mode); + dentry, mode, &delegated_inode); if (IS_ERR(dentry)) error = PTR_ERR(dentry); } end_creating_path(&path, dentry); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e2b9472e5c78c9f03731090ffdfb26eb5de38fe0..1f56834b2072fcee1d0d400bbb554b0c949ecab4 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -213,7 +213,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, 0700, NULL); if (IS_ERR(dentry)) status = PTR_ERR(dentry); out_put: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 9cb20d4aeab159ef3ba3584d1a3a33ef16ba4dea..97aef140cbf5fca4c41738fdcaccba3b57886463 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1558,7 +1558,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); + dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode, NULL); if (IS_ERR(dchild)) { host_err = PTR_ERR(dchild); } else if (d_is_negative(dchild)) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c8fd5951fc5ece1ae6b3e2a0801ca15f9faf7d72..0f65f9a5d54d4786b39e4f4f30f416d5b9016e70 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -248,7 +248,7 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, { struct dentry *ret; - ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, NULL); pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); return ret; } diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 891ed2dc2b7351a5cb14a2241d71095ffdd03f08..3d2190f26623b23ea79c63410905a3c3ad684048 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -230,7 +230,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; d = dentry; - dentry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); + dentry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode, NULL); if (IS_ERR(dentry)) err = PTR_ERR(dentry); else if (d_is_negative(dentry)) diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 9c12cb8442311ca26b169e4d1567939ae44a5be0..91c9d07b97f306f57aebb9b69ba564b0c2cb8c17 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -167,7 +167,7 @@ xrep_orphanage_create( */ if (d_really_is_negative(orphanage_dentry)) { orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode, - orphanage_dentry, 0750); + orphanage_dentry, 0750, NULL); error = PTR_ERR(orphanage_dentry); if (IS_ERR(orphanage_dentry)) goto out_unlock_root; diff --git a/include/linux/fs.h b/include/linux/fs.h index 909a88e3979d4f1ba3104f3d05145e1096ed44d5..20bb4c8a4e8e1be7e11047d228c05920ea6c388d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2114,7 +2114,7 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, - struct dentry *, umode_t); + struct dentry *, umode_t, struct delegated_inode *); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *, -- 2.51.1 In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a delegated_inode struct to vfs_rmdir() and populate that pointer with the parent inode if it's non-NULL. Most existing in-kernel callers pass in a NULL pointer. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- drivers/base/devtmpfs.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/namei.c | 22 +++++++++++++++++----- fs/nfsd/nfs4recover.c | 4 ++-- fs/nfsd/vfs.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- fs/smb/server/vfs.c | 4 ++-- include/linux/fs.h | 3 ++- 8 files changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 0e79621cb0f79870003b867ca384199171ded4e0..104025104ef75381984fd94dfbd50feeaa8cdd22 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -261,7 +261,7 @@ static int dev_rmdir(const char *name) return PTR_ERR(dentry); if (d_inode(dentry)->i_private == &thread) err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry), - dentry); + dentry, NULL); else err = -EPERM; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 35830b3144f8f71374a78b3e7463b864f4fc216e..88631291b32535f623a3fbe4ea9b6ed48a306ca0 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -540,7 +540,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) if (d_unhashed(lower_dentry)) rc = -EINVAL; else - rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry); + rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry, NULL); } if (!rc) { clear_nlink(d_inode(dentry)); diff --git a/fs/namei.c b/fs/namei.c index 76c0587d991ff7307e3dde69497719d716c8d7b8..9e0393a92091ac522b5324fcdad8c5592a948e8d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4522,9 +4522,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) /** * vfs_rmdir - remove directory - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child directory + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @delegated_inode: returns parent inode, if it's delegated. * * Remove a directory. * @@ -4535,7 +4536,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) * raw inode simply pass @nop_mnt_idmap. */ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry) + struct dentry *dentry, struct delegated_inode *delegated_inode) { int error = may_delete(idmap, dir, dentry, 1); @@ -4557,6 +4558,10 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, if (error) goto out; + error = try_break_deleg(dir, delegated_inode); + if (error) + goto out; + error = dir->i_op->rmdir(dir, dentry); if (error) goto out; @@ -4583,6 +4588,7 @@ int do_rmdir(int dfd, struct filename *name) struct qstr last; int type; unsigned int lookup_flags = 0; + struct delegated_inode delegated_inode = { }; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) @@ -4612,7 +4618,8 @@ int do_rmdir(int dfd, struct filename *name) error = security_path_rmdir(&path, dentry); if (error) goto exit4; - error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry); + error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry, &delegated_inode); exit4: dput(dentry); exit3: @@ -4620,6 +4627,11 @@ int do_rmdir(int dfd, struct filename *name) mnt_drop_write(path.mnt); exit2: path_put(&path); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 1f56834b2072fcee1d0d400bbb554b0c949ecab4..30bae93931d9c9f8900dfcf96f79403db4f3f458 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -337,7 +337,7 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn) status = -ENOENT; if (d_really_is_negative(dentry)) goto out; - status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry, NULL); out: dput(dentry); out_unlock: @@ -427,7 +427,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) if (nfs4_has_reclaimed_state(name, nn)) goto out_free; - status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL); if (status) printk("failed to remove client recovery directory %pd\n", child); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 97aef140cbf5fca4c41738fdcaccba3b57886463..c400ea94ff2e837fd59719bf2c4b79ef1d064743 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2108,7 +2108,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, break; } } else { - host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry); + host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry, NULL); } fh_fill_post_attrs(fhp); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0f65f9a5d54d4786b39e4f4f30f416d5b9016e70..d215d7349489686b66bb66e939b27046f7d836f6 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -206,7 +206,7 @@ static inline int ovl_do_notify_change(struct ovl_fs *ofs, static inline int ovl_do_rmdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { - int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry); + int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL); pr_debug("rmdir(%pd2) = %i\n", dentry, err); return err; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 3d2190f26623b23ea79c63410905a3c3ad684048..c5f0f3170d586cb2dc4d416b80948c642797fb82 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -609,7 +609,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) idmap = mnt_idmap(path->mnt); if (S_ISDIR(d_inode(path->dentry)->i_mode)) { - err = vfs_rmdir(idmap, d_inode(parent), path->dentry); + err = vfs_rmdir(idmap, d_inode(parent), path->dentry, NULL); if (err && err != -ENOTEMPTY) ksmbd_debug(VFS, "rmdir failed, err %d\n", err); } else { @@ -1090,7 +1090,7 @@ int ksmbd_vfs_unlink(struct file *filp) dget(dentry); if (S_ISDIR(d_inode(dentry)->i_mode)) - err = vfs_rmdir(idmap, d_inode(dir), dentry); + err = vfs_rmdir(idmap, d_inode(dir), dentry, NULL); else err = vfs_unlink(idmap, d_inode(dir), dentry, NULL); diff --git a/include/linux/fs.h b/include/linux/fs.h index 20bb4c8a4e8e1be7e11047d228c05920ea6c388d..12873214e1c7811735ea5d2dee3d57e2a5604d8f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2121,7 +2121,8 @@ int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, struct dentry *, struct delegated_inode *); -int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *); +int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *, + struct delegated_inode *); int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, struct delegated_inode *); -- 2.51.1 In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a delegated_inode parameter to lookup_open and have it break the delegation. Then, open_last_lookups can wait for the delegation break and retry the call to lookup_open once it's done. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- fs/namei.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 9e0393a92091ac522b5324fcdad8c5592a948e8d..f439429bdfa271ccc64c937771ef4175597feb53 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3697,7 +3697,7 @@ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry, */ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, const struct open_flags *op, - bool got_write) + bool got_write, struct delegated_inode *delegated_inode) { struct mnt_idmap *idmap; struct dentry *dir = nd->path.dentry; @@ -3786,6 +3786,11 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, /* Negative dentry, just create the file */ if (!dentry->d_inode && (open_flag & O_CREAT)) { + /* but break the directory lease first! */ + error = try_break_deleg(dir_inode, delegated_inode); + if (error) + goto out_dput; + file->f_mode |= FMODE_CREATED; audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE); if (!dir_inode->i_op->create) { @@ -3848,6 +3853,7 @@ static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag) static const char *open_last_lookups(struct nameidata *nd, struct file *file, const struct open_flags *op) { + struct delegated_inode delegated_inode = { }; struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool got_write = false; @@ -3879,7 +3885,7 @@ static const char *open_last_lookups(struct nameidata *nd, return ERR_PTR(-ECHILD); } } - +retry: if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { got_write = !mnt_want_write(nd->path.mnt); /* @@ -3892,7 +3898,7 @@ static const char *open_last_lookups(struct nameidata *nd, inode_lock(dir->d_inode); else inode_lock_shared(dir->d_inode); - dentry = lookup_open(nd, file, op, got_write); + dentry = lookup_open(nd, file, op, got_write, &delegated_inode); if (!IS_ERR(dentry)) { if (file->f_mode & FMODE_CREATED) fsnotify_create(dir->d_inode, dentry); @@ -3907,8 +3913,16 @@ static const char *open_last_lookups(struct nameidata *nd, if (got_write) mnt_drop_write(nd->path.mnt); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + if (is_delegated(&delegated_inode)) { + int error = break_deleg_wait(&delegated_inode); + + if (!error) + goto retry; + return ERR_PTR(error); + } return ERR_CAST(dentry); + } if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) { dput(nd->path.dentry); -- 2.51.1 vfs_create() has grown an uncomfortably long argument list, and a following patch will add another. Convert it to take a new struct createdata pointer and fix up the callers to pass one in. Signed-off-by: Jeff Layton --- fs/ecryptfs/inode.c | 11 ++++++++--- fs/namei.c | 33 ++++++++++++++++++++------------- fs/nfsd/nfs3proc.c | 9 ++++++++- fs/nfsd/vfs.c | 19 ++++++++++++------- fs/open.c | 9 ++++++--- fs/overlayfs/overlayfs.h | 7 ++++++- fs/smb/server/vfs.c | 9 +++++++-- include/linux/fs.h | 13 +++++++++++-- 8 files changed, 78 insertions(+), 32 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 88631291b32535f623a3fbe4ea9b6ed48a306ca0..51accd166dbf515eb5221b6a39b204622a6b0f7c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -187,9 +187,14 @@ ecryptfs_do_create(struct inode *directory_inode, struct inode *inode; rc = lock_parent(ecryptfs_dentry, &lower_dentry, &lower_dir); - if (!rc) - rc = vfs_create(&nop_mnt_idmap, lower_dir, - lower_dentry, mode, true); + if (!rc) { + struct createdata args = { .idmap = &nop_mnt_idmap, + .dir = lower_dir, + .dentry = lower_dentry, + .mode = mode, + .excl = true }; + rc = vfs_create(&args); + } if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " "rc = [%d]\n", __func__, rc); diff --git a/fs/namei.c b/fs/namei.c index f439429bdfa271ccc64c937771ef4175597feb53..fdf4e78cd041de8c564b7d1d89a46ba2aaf79d53 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3460,23 +3460,22 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, /** * vfs_create - create new file - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child file - * @mode: mode of the child file - * @want_excl: whether the file must not yet exist + * @args: struct createdata describing create to be done * * Create a new file. * * If the inode has been found through an idmapped mount the idmap of - * the vfsmount must be passed through @idmap. This function will then take - * care to map the inode according to @idmap before checking permissions. + * the vfsmount must be passed through @args->idmap. This function will then take + * care to map the inode according to @args->idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ -int vfs_create(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, bool want_excl) +int vfs_create(struct createdata *args) { + struct mnt_idmap *idmap = args->idmap; + struct inode *dir = args->dir; + struct dentry *dentry = args->dentry; + umode_t mode = args->mode; int error; error = may_create(idmap, dir, dentry); @@ -3490,7 +3489,7 @@ int vfs_create(struct mnt_idmap *idmap, struct inode *dir, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(idmap, dir, dentry, mode, want_excl); + error = dir->i_op->create(idmap, dir, dentry, mode, args->excl); if (!error) fsnotify_create(dir, dentry); return error; @@ -4382,12 +4381,20 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode, idmap = mnt_idmap(path.mnt); switch (mode & S_IFMT) { - case 0: case S_IFREG: - error = vfs_create(idmap, path.dentry->d_inode, - dentry, mode, true); + case 0: + case S_IFREG: + { + struct createdata args = { .idmap = idmap, + .dir = path.dentry->d_inode, + .dentry = dentry, + .mode = mode, + .excl = true }; + + error = vfs_create(&args); if (!error) security_path_post_mknod(idmap, dentry); break; + } case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, dentry, mode, new_decode_dev(dev)); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index b6d03e1ef5f7a5e8dd111b0d56c061f1e91abff7..dcd7de465e7e33d1c66ee0272c4f220d55e85928 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -258,6 +258,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_attrs attrs = { .na_iattr = iap, }; + struct createdata cargs = { }; __u32 v_mtime, v_atime; struct inode *inode; __be32 status; @@ -344,7 +345,13 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = fh_fill_pre_attrs(fhp); if (status != nfs_ok) goto out; - host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true); + + cargs.idmap = &nop_mnt_idmap; + cargs.dir = inode; + cargs.dentry = child; + cargs.mode = iap->ia_mode; + cargs.excl = true; + host_err = vfs_create(&cargs); if (host_err < 0) { status = nfserrno(host_err); goto out; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c400ea94ff2e837fd59719bf2c4b79ef1d064743..e4ed1952f02c0a66c64528e59453cc9b2352c18f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1527,11 +1527,12 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *resfhp) { - struct dentry *dentry, *dchild; - struct inode *dirp; - struct iattr *iap = attrs->na_iattr; - __be32 err; - int host_err = 0; + struct dentry *dentry, *dchild; + struct inode *dirp; + struct iattr *iap = attrs->na_iattr; + __be32 err; + int host_err = 0; + struct createdata cargs = { }; dentry = fhp->fh_dentry; dirp = d_inode(dentry); @@ -1552,8 +1553,12 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(&nop_mnt_idmap, dirp, dchild, - iap->ia_mode, true); + cargs.idmap = &nop_mnt_idmap; + cargs.dir = dirp; + cargs.dentry = dchild; + cargs.mode = iap->ia_mode; + cargs.excl = true; + host_err = vfs_create(&cargs); if (!host_err) nfsd_check_ignore_resizing(iap); break; diff --git a/fs/open.c b/fs/open.c index fdaa6f08f6f4cac5c2fefd3eafa5e430e51f3979..006cc2aeb1fbbb3db48b32db798108da120f75c2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1164,6 +1164,11 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred) { + struct createdata cargs = { .idmap = mnt_idmap(path->mnt), + .dir = d_inode(path->dentry->d_parent), + .dentry = path->dentry, + .mode = mode, + .excl = true }; struct file *f; int error; @@ -1171,9 +1176,7 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, if (IS_ERR(f)) return f; - error = vfs_create(mnt_idmap(path->mnt), - d_inode(path->dentry->d_parent), - path->dentry, mode, true); + error = vfs_create(&cargs); if (!error) error = vfs_open(path, f); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d215d7349489686b66bb66e939b27046f7d836f6..5fa939ac842ed04df8f0088233f4cba4ac703c05 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -235,7 +235,12 @@ static inline int ovl_do_create(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = vfs_create(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, true); + struct createdata cargs = { .idmap = ovl_upper_mnt_idmap(ofs), + .dir = dir, + .dentry = dentry, + .mode = mode, + .excl = true }; + int err = vfs_create(&cargs); pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); return err; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index c5f0f3170d586cb2dc4d416b80948c642797fb82..fbc3c34e14b870f1750b945349335afb62d89d0d 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -173,6 +173,7 @@ void ksmbd_vfs_query_maximal_access(struct mnt_idmap *idmap, */ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) { + struct createdata cargs = { }; struct path path; struct dentry *dentry; int err; @@ -188,8 +189,12 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) } mode |= S_IFREG; - err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry), - dentry, mode, true); + cargs.idmap = mnt_idmap(path.mnt); + cargs.dir = d_inode(path.dentry); + cargs.dentry = dentry; + cargs.mode = mode; + cargs.excl = true; + err = vfs_create(&cargs); if (!err) { ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(dentry)); diff --git a/include/linux/fs.h b/include/linux/fs.h index 12873214e1c7811735ea5d2dee3d57e2a5604d8f..b61873767b37591aecadd147623d7dfc866bef82 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2111,8 +2111,17 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, /* * VFS helper functions.. */ -int vfs_create(struct mnt_idmap *, struct inode *, - struct dentry *, umode_t, bool); + +struct createdata { + struct mnt_idmap *idmap; // idmap of the mount the inode was found from + struct inode *dir; // inode of parent directory + struct dentry *dentry; // dentry of the child file + umode_t mode; // mode of the child file + bool excl; // whether the file must not yet exist +}; + +int vfs_create(struct createdata *); + struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, struct delegated_inode *); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, -- 2.51.1 In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a delegated_inode parameter to struct createdata. Most callers just leave that as a NULL pointer, but do_mknodat() is changed to wait for a delegation break if there is one. Signed-off-by: Jeff Layton --- fs/namei.c | 26 +++++++++++++++++--------- include/linux/fs.h | 2 +- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index fdf4e78cd041de8c564b7d1d89a46ba2aaf79d53..e8973000a312fb05ebb63a0d9bd83b9a5f8f805d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3487,6 +3487,9 @@ int vfs_create(struct createdata *args) mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG); error = security_inode_create(dir, dentry, mode); + if (error) + return error; + error = try_break_deleg(dir, args->delegated_inode); if (error) return error; error = dir->i_op->create(idmap, dir, dentry, mode, args->excl); @@ -4359,6 +4362,8 @@ static int may_mknod(umode_t mode) static int do_mknodat(int dfd, struct filename *name, umode_t mode, unsigned int dev) { + struct delegated_inode delegated_inode = { }; + struct createdata cargs = { }; struct mnt_idmap *idmap; struct dentry *dentry; struct path path; @@ -4383,18 +4388,16 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode, switch (mode & S_IFMT) { case 0: case S_IFREG: - { - struct createdata args = { .idmap = idmap, - .dir = path.dentry->d_inode, - .dentry = dentry, - .mode = mode, - .excl = true }; - - error = vfs_create(&args); + cargs.idmap = idmap, + cargs.dir = path.dentry->d_inode, + cargs.dentry = dentry, + cargs.delegated_inode = &delegated_inode; + cargs.mode = mode, + cargs.excl = true, + error = vfs_create(&cargs); if (!error) security_path_post_mknod(idmap, dentry); break; - } case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, dentry, mode, new_decode_dev(dev)); @@ -4406,6 +4409,11 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode, } out2: end_creating_path(&path, dentry); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; diff --git a/include/linux/fs.h b/include/linux/fs.h index b61873767b37591aecadd147623d7dfc866bef82..cfcb20a7c4ce4b6dcec98b3eccbdb5ec8bab6fa9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2116,12 +2116,12 @@ struct createdata { struct mnt_idmap *idmap; // idmap of the mount the inode was found from struct inode *dir; // inode of parent directory struct dentry *dentry; // dentry of the child file + struct delegated_inode *delegated_inode; // returns parent inode, if delegated umode_t mode; // mode of the child file bool excl; // whether the file must not yet exist }; int vfs_create(struct createdata *); - struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, struct delegated_inode *); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, -- 2.51.1 In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a new delegated_inode pointer to vfs_mknod() and have the appropriate callers wait when there is an outstanding delegation. All other callers just set the pointer to NULL. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- drivers/base/devtmpfs.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/init.c | 2 +- fs/namei.c | 25 +++++++++++++++++-------- fs/nfsd/vfs.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- include/linux/fs.h | 4 ++-- net/unix/af_unix.c | 2 +- 8 files changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 104025104ef75381984fd94dfbd50feeaa8cdd22..2f576ecf18324f767cd5ac6cbd28adbf9f46b958 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -231,7 +231,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid, return PTR_ERR(dentry); err = vfs_mknod(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, - dev->devt); + dev->devt, NULL); if (!err) { struct iattr newattrs; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 51accd166dbf515eb5221b6a39b204622a6b0f7c..42d00f5080b8c52622a5e57bd70a5659504da46e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -570,7 +570,7 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir, rc = lock_parent(dentry, &lower_dentry, &lower_dir); if (!rc) rc = vfs_mknod(&nop_mnt_idmap, lower_dir, - lower_dentry, mode, dev); + lower_dentry, mode, dev, NULL); if (rc || d_really_is_negative(lower_dentry)) goto out; rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); diff --git a/fs/init.c b/fs/init.c index 895f8a09a71acfd03e11164e3b441a7d4e2de146..4f02260dd65b0dfcbfbf5812d2ec6a33444a3b1f 100644 --- a/fs/init.c +++ b/fs/init.c @@ -157,7 +157,7 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev) error = security_path_mknod(&path, dentry, mode, dev); if (!error) error = vfs_mknod(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, mode, new_decode_dev(dev)); + dentry, mode, new_decode_dev(dev), NULL); end_creating_path(&path, dentry); return error; } diff --git a/fs/namei.c b/fs/namei.c index e8973000a312fb05ebb63a0d9bd83b9a5f8f805d..8851f0870f63ba1d3789300dbacc8b9cce534900 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4294,13 +4294,15 @@ inline struct dentry *start_creating_user_path( } EXPORT_SYMBOL(start_creating_user_path); + /** * vfs_mknod - create device node or file - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child device node - * @mode: mode of the child device node - * @dev: device number of device to create + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child device node + * @mode: mode of the child device node + * @dev: device number of device to create + * @delegated_inode: returns parent inode, if the inode is delegated. * * Create a device node or file. * @@ -4311,7 +4313,8 @@ EXPORT_SYMBOL(start_creating_user_path); * raw inode simply pass @nop_mnt_idmap. */ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, dev_t dev) + struct dentry *dentry, umode_t mode, dev_t dev, + struct delegated_inode *delegated_inode) { bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; int error = may_create(idmap, dir, dentry); @@ -4335,6 +4338,10 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, if (error) return error; + error = try_break_deleg(dir, delegated_inode); + if (error) + return error; + error = dir->i_op->mknod(idmap, dir, dentry, mode, dev); if (!error) fsnotify_create(dir, dentry); @@ -4400,11 +4407,13 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode, break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, - dentry, mode, new_decode_dev(dev)); + dentry, mode, new_decode_dev(dev), + &delegated_inode); break; case S_IFIFO: case S_IFSOCK: error = vfs_mknod(idmap, path.dentry->d_inode, - dentry, mode, 0); + dentry, mode, 0, + &delegated_inode); break; } out2: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e4ed1952f02c0a66c64528e59453cc9b2352c18f..ce8c653f7a4edab81a7f1b231a4ae77dc82c073a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1579,7 +1579,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, case S_IFIFO: case S_IFSOCK: host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild, - iap->ia_mode, rdev); + iap->ia_mode, rdev, NULL); break; default: printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n", diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 5fa939ac842ed04df8f0088233f4cba4ac703c05..958531448cb85aeb3bfa174e9dbf25469384b53f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -262,7 +262,7 @@ static inline int ovl_do_mknod(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev); + int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev, NULL); pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err); return err; diff --git a/include/linux/fs.h b/include/linux/fs.h index cfcb20a7c4ce4b6dcec98b3eccbdb5ec8bab6fa9..47031dbb0b026c0b59661719df551979717907a5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2125,7 +2125,7 @@ int vfs_create(struct createdata *); struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, struct delegated_inode *); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, - umode_t, dev_t); + umode_t, dev_t, struct delegated_inode *); int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, @@ -2161,7 +2161,7 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE, - WHITEOUT_DEV); + WHITEOUT_DEV, NULL); } struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 768098dec2310008632558ae928703b37c3cc8ef..db1fd8d6a84c2c7c0d45b43d9c5a936b3d491b7b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1399,7 +1399,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, idmap = mnt_idmap(parent.mnt); err = security_path_mknod(&parent, dentry, mode, 0); if (!err) - err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); + err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0, NULL); if (err) goto out_path; err = mutex_lock_interruptible(&u->bindlock); -- 2.51.1 In order to add directory delegation support, we must break delegations on the parent on any change to the directory. Add a delegated_inode parameter to vfs_symlink() and have it break the delegation. do_symlinkat() can then wait on the delegation break before proceeding. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- fs/ecryptfs/inode.c | 2 +- fs/init.c | 2 +- fs/namei.c | 16 ++++++++++++++-- fs/nfsd/vfs.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- include/linux/fs.h | 2 +- 6 files changed, 19 insertions(+), 7 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 42d00f5080b8c52622a5e57bd70a5659504da46e..c52bd187f21437fa1a0deb8f3dcd60c8930154ef 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -485,7 +485,7 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap, if (rc) goto out_lock; rc = vfs_symlink(&nop_mnt_idmap, lower_dir, lower_dentry, - encoded_symname); + encoded_symname, NULL); kfree(encoded_symname); if (rc || d_really_is_negative(lower_dentry)) goto out_lock; diff --git a/fs/init.c b/fs/init.c index 4f02260dd65b0dfcbfbf5812d2ec6a33444a3b1f..e0f5429c0a49d046bd3f231a260954ed0f90ef44 100644 --- a/fs/init.c +++ b/fs/init.c @@ -209,7 +209,7 @@ int __init init_symlink(const char *oldname, const char *newname) error = security_path_symlink(&path, dentry, oldname); if (!error) error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, oldname); + dentry, oldname, NULL); end_creating_path(&path, dentry); return error; } diff --git a/fs/namei.c b/fs/namei.c index 8851f0870f63ba1d3789300dbacc8b9cce534900..7071cdbfc587c108ebd777746f736d854feb68e7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4854,6 +4854,7 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) * @dir: inode of the parent directory * @dentry: dentry of the child symlink file * @oldname: name of the file to link to + * @delegated_inode: returns victim inode, if the inode is delegated. * * Create a symlink. * @@ -4864,7 +4865,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) * raw inode simply pass @nop_mnt_idmap. */ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, const char *oldname) + struct dentry *dentry, const char *oldname, + struct delegated_inode *delegated_inode) { int error; @@ -4879,6 +4881,10 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, if (error) return error; + error = try_break_deleg(dir, delegated_inode); + if (error) + return error; + error = dir->i_op->symlink(idmap, dir, dentry, oldname); if (!error) fsnotify_create(dir, dentry); @@ -4892,6 +4898,7 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to) struct dentry *dentry; struct path path; unsigned int lookup_flags = 0; + struct delegated_inode delegated_inode = { }; if (IS_ERR(from)) { error = PTR_ERR(from); @@ -4906,8 +4913,13 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to) error = security_path_symlink(&path, dentry, from->name); if (!error) error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, from->name); + dentry, from->name, &delegated_inode); end_creating_path(&path, dentry); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ce8c653f7a4edab81a7f1b231a4ae77dc82c073a..edcc8c05e435a4abba27dd2eb07facf4b5ed3243 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1748,7 +1748,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, err = fh_fill_pre_attrs(fhp); if (err != nfs_ok) goto out_unlock; - host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path); + host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path, NULL); err = nfserrno(host_err); cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); if (!err) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 958531448cb85aeb3bfa174e9dbf25469384b53f..3e4774a979976c65f2984bbc212bd403e440644e 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -272,7 +272,7 @@ static inline int ovl_do_symlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, const char *oldname) { - int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname); + int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname, NULL); pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); return err; diff --git a/include/linux/fs.h b/include/linux/fs.h index 47031dbb0b026c0b59661719df551979717907a5..8cd1804a2c3dcfa0153b1bc1c0250f5a989c857c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2127,7 +2127,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t, struct delegated_inode *); int vfs_symlink(struct mnt_idmap *, struct inode *, - struct dentry *, const char *); + struct dentry *, const char *, struct delegated_inode *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, struct dentry *, struct delegated_inode *); int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *, -- 2.51.1 With the addition of the try_break_lease calls in directory changing operations, allow generic_setlease to hand them out. Write leases on directories are never allowed however, so continue to reject them. For now, there is no API for requesting delegations from userland, so ensure that userland is prevented from acquiring a lease on a directory. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- fs/locks.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index f5b210a2dc34c70ac36e972436c62482bbe32ca6..dd290a87f58eb5d522f03fa99d612fbad84dacf3 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1935,14 +1935,19 @@ static int generic_delete_lease(struct file *filp, void *owner) int generic_setlease(struct file *filp, int arg, struct file_lease **flp, void **priv) { - if (!S_ISREG(file_inode(filp)->i_mode)) + struct inode *inode = file_inode(filp); + + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) return -EINVAL; switch (arg) { case F_UNLCK: return generic_delete_lease(filp, *priv); - case F_RDLCK: case F_WRLCK: + if (S_ISDIR(inode->i_mode)) + return -EINVAL; + fallthrough; + case F_RDLCK: if (!(*flp)->fl_lmops->lm_break) { WARN_ON_ONCE(1); return -ENOLCK; @@ -2071,6 +2076,9 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg) */ int fcntl_setlease(unsigned int fd, struct file *filp, int arg) { + if (S_ISDIR(file_inode(filp)->i_mode)) + return -EINVAL; + if (arg == F_UNLCK) return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); return do_fcntl_add_lease(fd, filp, arg); -- 2.51.1 The filecache infrastructure will only handle S_IFREG files at the moment. Directory delegations will require adding support for opening S_IFDIR inodes. Plumb a "type" argument into nfsd_file_do_acquire() and have all of the existing callers set it to S_IFREG. Add a new nfsd_file_acquire_dir() wrapper that nfsd can call to request a nfsd_file that holds a directory open. For now, there is no need for a fsnotify_mark for directories, as CB_NOTIFY is not yet supported. Change nfsd_file_do_acquire() to avoid allocating one for non-S_IFREG inodes. Reviewed-by: Chuck Lever Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- fs/nfsd/filecache.c | 57 ++++++++++++++++++++++++++++++++++++++++------------- fs/nfsd/filecache.h | 2 ++ fs/nfsd/vfs.c | 5 +++-- fs/nfsd/vfs.h | 2 +- 4 files changed, 49 insertions(+), 17 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a238b6725008a5c2988bd3da874d1f34ee778437..93798575b8075c63f95cd415b6d24df706ada0f6 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1086,7 +1086,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, struct auth_domain *client, struct svc_fh *fhp, unsigned int may_flags, struct file *file, - struct nfsd_file **pnf, bool want_gc) + umode_t type, bool want_gc, struct nfsd_file **pnf) { unsigned char need = may_flags & NFSD_FILE_MAY_MASK; struct nfsd_file *new, *nf; @@ -1097,13 +1097,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, int ret; retry: - if (rqstp) { - status = fh_verify(rqstp, fhp, S_IFREG, + if (rqstp) + status = fh_verify(rqstp, fhp, type, may_flags|NFSD_MAY_OWNER_OVERRIDE); - } else { - status = fh_verify_local(net, cred, client, fhp, S_IFREG, + else + status = fh_verify_local(net, cred, client, fhp, type, may_flags|NFSD_MAY_OWNER_OVERRIDE); - } + if (status != nfs_ok) return status; inode = d_inode(fhp->fh_dentry); @@ -1176,15 +1176,18 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, open_file: trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(inode); - if (nf->nf_mark) { + + if (type == S_IFREG) + nf->nf_mark = nfsd_file_mark_find_or_create(inode); + + if (type != S_IFREG || nf->nf_mark) { if (file) { get_file(file); nf->nf_file = file; status = nfs_ok; trace_nfsd_file_opened(nf, status); } else { - ret = nfsd_open_verified(fhp, may_flags, &nf->nf_file); + ret = nfsd_open_verified(fhp, type, may_flags, &nf->nf_file); if (ret == -EOPENSTALE && stale_retry) { stale_retry = false; nfsd_file_unhash(nf); @@ -1246,7 +1249,7 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, - fhp, may_flags, NULL, pnf, true); + fhp, may_flags, NULL, S_IFREG, true, pnf); } /** @@ -1271,7 +1274,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, - fhp, may_flags, NULL, pnf, false); + fhp, may_flags, NULL, S_IFREG, false, pnf); } /** @@ -1314,8 +1317,8 @@ nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, const struct cred *save_cred = get_current_cred(); __be32 beres; - beres = nfsd_file_do_acquire(NULL, net, cred, client, - fhp, may_flags, NULL, pnf, false); + beres = nfsd_file_do_acquire(NULL, net, cred, client, fhp, may_flags, + NULL, S_IFREG, false, pnf); put_cred(revert_creds(save_cred)); return beres; } @@ -1344,7 +1347,33 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file **pnf) { return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, - fhp, may_flags, file, pnf, false); + fhp, may_flags, file, S_IFREG, false, pnf); +} + +/** + * nfsd_file_acquire_dir - Get a struct nfsd_file with an open directory + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file to be opened + * @pnf: OUT: new or found "struct nfsd_file" object + * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). This opens directories only, and only + * in O_RDONLY mode. + * + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. + */ +__be32 +nfsd_file_acquire_dir(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, fhp, + NFSD_MAY_READ|NFSD_MAY_64BIT_COOKIE, + NULL, S_IFDIR, false, pnf); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index e3d6ca2b60308e5e91ba4bb32d935f54527d8bda..b383dbc5b9218d21a29b852572f80fab08de9fa9 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -82,5 +82,7 @@ __be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, struct auth_domain *client, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf); +__be32 nfsd_file_acquire_dir(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file **pnf); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index edcc8c05e435a4abba27dd2eb07facf4b5ed3243..e94747ae5897b1a33e2d09b7a3cbf6f5ad1ca417 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -959,15 +959,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, /** * nfsd_open_verified - Open a regular file for the filecache * @fhp: NFS filehandle of the file to open + * @type: S_IFMT inode type allowed (0 means any type is allowed) * @may_flags: internal permission flags * @filp: OUT: open "struct file *" * * Returns zero on success, or a negative errno value. */ int -nfsd_open_verified(struct svc_fh *fhp, int may_flags, struct file **filp) +nfsd_open_verified(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { - return __nfsd_open(fhp, S_IFREG, may_flags, filp); + return __nfsd_open(fhp, type, may_flags, filp); } /* diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 0c0292611c6de3daf6f3ed51e2c61c0ad2751de4..09de48c50cbef8e7c4828b38dcb663b529514a30 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -114,7 +114,7 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); -int nfsd_open_verified(struct svc_fh *fhp, int may_flags, +int nfsd_open_verified(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp); __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, -- 2.51.1 As Trond pointed out: "...provided that the presented stateid is actually valid, it is also sufficient to uniquely identify the file to which it is associated (see RFC8881 Section 8.2.4), so the filehandle should be considered mostly irrelevant for operations like DELEGRETURN." Don't ask fh_verify to filter on file type. Reviewed-by: Chuck Lever Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- fs/nfsd/nfs4state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 81fa7cc6c77b3cdc5ff22bc60ab0654f95dc258d..da66798023aba4c36c38208cec7333db237e46e0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7828,7 +7828,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) + status = fh_verify(rqstp, &cstate->current_fh, 0, 0); + if (status) return status; status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn); -- 2.51.1 Add a new routine for acquiring a read delegation on a directory. These are recallable-only delegations with no support for CB_NOTIFY. That will be added in a later phase. Since the same CB_RECALL/DELEGRETURN infrastructure is used for regular and directory delegations, a normal nfs4_delegation is used to represent a directory delegation. Reviewed-by: NeilBrown Reviewed-by: Chuck Lever Signed-off-by: Jeff Layton --- fs/nfsd/nfs4proc.c | 22 +++++++++++- fs/nfsd/nfs4state.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/state.h | 5 +++ 3 files changed, 126 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e466cf52d7d7e1a78c3a469613a85ab3546d6d17..517968dddf4a33651313658d300f9f929f83c5af 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2341,6 +2341,13 @@ nfsd4_get_dir_delegation(struct svc_rqst *rqstp, union nfsd4_op_u *u) { struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation; + struct nfs4_delegation *dd; + struct nfsd_file *nf; + __be32 status; + + status = nfsd_file_acquire_dir(rqstp, &cstate->current_fh, &nf); + if (status != nfs_ok) + return status; /* * RFC 8881, section 18.39.3 says: @@ -2354,7 +2361,20 @@ nfsd4_get_dir_delegation(struct svc_rqst *rqstp, * return NFS4_OK with a non-fatal status of GDD4_UNAVAIL in this * situation. */ - gdd->gddrnf_status = GDD4_UNAVAIL; + dd = nfsd_get_dir_deleg(cstate, gdd, nf); + nfsd_file_put(nf); + if (IS_ERR(dd)) { + int err = PTR_ERR(dd); + + if (err != -EAGAIN) + return nfserrno(err); + gdd->gddrnf_status = GDD4_UNAVAIL; + return nfs_ok; + } + + gdd->gddrnf_status = GDD4_OK; + memcpy(&gdd->gddr_stateid, &dd->dl_stid.sc_stateid, sizeof(gdd->gddr_stateid)); + nfs4_put_stid(&dd->dl_stid); return nfs_ok; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index da66798023aba4c36c38208cec7333db237e46e0..8f8c9385101e15b64883eabec71775f26b14f890 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -9347,3 +9347,103 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, nfs4_put_stid(&dp->dl_stid); return status; } + +/** + * nfsd_get_dir_deleg - attempt to get a directory delegation + * @cstate: compound state + * @gdd: GET_DIR_DELEGATION arg/resp structure + * @nf: nfsd_file opened on the directory + * + * Given a GET_DIR_DELEGATION request @gdd, attempt to acquire a delegation + * on the directory to which @nf refers. Note that this does not set up any + * sort of async notifications for the delegation. + */ +struct nfs4_delegation * +nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate, + struct nfsd4_get_dir_delegation *gdd, + struct nfsd_file *nf) +{ + struct nfs4_client *clp = cstate->clp; + struct nfs4_delegation *dp; + struct file_lease *fl; + struct nfs4_file *fp, *rfp; + int status = 0; + + fp = nfsd4_alloc_file(); + if (!fp) + return ERR_PTR(-ENOMEM); + + nfsd4_file_init(&cstate->current_fh, fp); + + rfp = nfsd4_file_hash_insert(fp, &cstate->current_fh); + if (unlikely(!rfp)) { + put_nfs4_file(fp); + return ERR_PTR(-ENOMEM); + } + + if (rfp != fp) { + put_nfs4_file(fp); + fp = rfp; + } + + /* if this client already has one, return that it's unavailable */ + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + /* existing delegation? */ + if (nfs4_delegation_exists(clp, fp)) { + status = -EAGAIN; + } else if (!fp->fi_deleg_file) { + fp->fi_deleg_file = nfsd_file_get(nf); + fp->fi_delegees = 1; + } else { + ++fp->fi_delegees; + } + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) { + put_nfs4_file(fp); + return ERR_PTR(status); + } + + /* Try to set up the lease */ + status = -ENOMEM; + dp = alloc_init_deleg(clp, fp, NULL, NFS4_OPEN_DELEGATE_READ); + if (!dp) + goto out_delegees; + + fl = nfs4_alloc_init_lease(dp); + if (!fl) + goto out_put_stid; + + status = kernel_setlease(nf->nf_file, + fl->c.flc_type, &fl, NULL); + if (fl) + locks_free_lease(fl); + if (status) + goto out_put_stid; + + /* + * Now, try to hash it. This can fail if we race another nfsd task + * trying to set a delegation on the same file. If that happens, + * then just say UNAVAIL. + */ + spin_lock(&state_lock); + spin_lock(&clp->cl_lock); + spin_lock(&fp->fi_lock); + status = hash_delegation_locked(dp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&clp->cl_lock); + spin_unlock(&state_lock); + + if (!status) + return dp; + + /* Something failed. Drop the lease and clean up the stid */ + kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp); +out_put_stid: + nfs4_put_stid(&dp->dl_stid); +out_delegees: + put_deleg_file(fp); + return ERR_PTR(status); +} diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 1e736f4024263ffa9c93bcc9ec48f44566a8cc77..b052c1effdc5356487c610db9728df8ecfe851d4 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -867,4 +867,9 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_delegation **pdp); + +struct nfsd4_get_dir_delegation; +struct nfs4_delegation *nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate, + struct nfsd4_get_dir_delegation *gdd, + struct nfsd_file *nf); #endif /* NFSD4_STATE_H */ -- 2.51.1 Now that support for recallable directory delegations is available, expose this functionality to userland with new F_SETDELEG and F_GETDELEG commands for fcntl(). Note that this also allows userland to request a FL_DELEG type lease on files too. Userland applications that do will get signalled when there are metadata changes in addition to just data changes (which is a limitation of FL_LEASE leases). These commands accept a new "struct delegation" argument that contains a flags field for future expansion. Signed-off-by: Jeff Layton --- fs/fcntl.c | 14 ++++++++++++++ fs/locks.c | 42 +++++++++++++++++++++++++++++++++++++----- include/linux/filelock.h | 12 ++++++++++++ include/uapi/linux/fcntl.h | 10 ++++++++++ 4 files changed, 73 insertions(+), 5 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 72f8433d9109889eecef56b32d20a85b4e12ea44..8d57a6e34076d68228b6d8d9bb381f78a751b151 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -445,6 +445,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { void __user *argp = (void __user *)arg; + struct delegation deleg; int argi = (int)arg; struct flock flock; long err = -EINVAL; @@ -550,6 +551,19 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SET_RW_HINT: err = fcntl_set_rw_hint(filp, arg); break; + case F_GETDELEG: + if (copy_from_user(&deleg, argp, sizeof(deleg))) + return -EFAULT; + fcntl_getdeleg(filp, &deleg); + if (copy_to_user(argp, &deleg, sizeof(deleg))) + return -EFAULT; + err = 0; + break; + case F_SETDELEG: + if (copy_from_user(&deleg, argp, sizeof(deleg))) + return -EFAULT; + err = fcntl_setdeleg(fd, filp, &deleg); + break; default: break; } diff --git a/fs/locks.c b/fs/locks.c index dd290a87f58eb5d522f03fa99d612fbad84dacf3..1e29aecf79b8df66a6c67ca5f59cec40a376b9bc 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1703,7 +1703,7 @@ EXPORT_SYMBOL(lease_get_mtime); * XXX: sfr & willy disagree over whether F_INPROGRESS * should be returned to userspace. */ -int fcntl_getlease(struct file *filp) +static int __fcntl_getlease(struct file *filp, unsigned int flavor) { struct file_lease *fl; struct inode *inode = file_inode(filp); @@ -1719,7 +1719,8 @@ int fcntl_getlease(struct file *filp) list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file != filp) continue; - type = target_leasetype(fl); + if (fl->c.flc_flags & flavor) + type = target_leasetype(fl); break; } spin_unlock(&ctx->flc_lock); @@ -1730,6 +1731,16 @@ int fcntl_getlease(struct file *filp) return type; } +int fcntl_getlease(struct file *filp) +{ + return __fcntl_getlease(filp, FL_LEASE); +} + +void fcntl_getdeleg(struct file *filp, struct delegation *deleg) +{ + deleg->d_type = __fcntl_getlease(filp, FL_DELEG); +} + /** * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the @@ -2039,13 +2050,13 @@ vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) } EXPORT_SYMBOL_GPL(vfs_setlease); -static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg) +static int do_fcntl_add_lease(unsigned int fd, struct file *filp, unsigned int flavor, int arg) { struct file_lease *fl; struct fasync_struct *new; int error; - fl = lease_alloc(filp, FL_LEASE, arg); + fl = lease_alloc(filp, flavor, arg); if (IS_ERR(fl)) return PTR_ERR(fl); @@ -2081,7 +2092,28 @@ int fcntl_setlease(unsigned int fd, struct file *filp, int arg) if (arg == F_UNLCK) return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); - return do_fcntl_add_lease(fd, filp, arg); + return do_fcntl_add_lease(fd, filp, FL_LEASE, arg); +} + +/** + * fcntl_setdeleg - sets a delegation on an open file + * @fd: open file descriptor + * @filp: file pointer + * @deleg: delegation request from userland + * + * Call this fcntl to establish a delegation on the file. + * Note that you also need to call %F_SETSIG to + * receive a signal when the lease is broken. + */ +int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg) +{ + /* For now, no flags are supported */ + if (deleg->d_flags != 0) + return -EINVAL; + + if (deleg->d_type == F_UNLCK) + return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); + return do_fcntl_add_lease(fd, filp, FL_DELEG, deleg->d_type); } /** diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 208d108df2d73a9df65e5dc9968d074af385f881..4384c6f61fad8f21d15974fac1d0f77747155f0f 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -159,6 +159,8 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int, int fcntl_setlease(unsigned int fd, struct file *filp, int arg); int fcntl_getlease(struct file *filp); +int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg); +void fcntl_getdeleg(struct file *filp, struct delegation *deleg); static inline bool lock_is_unlock(struct file_lock *fl) { @@ -278,6 +280,16 @@ static inline int fcntl_getlease(struct file *filp) return F_UNLCK; } +static inline int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg) +{ + return -EINVAL; +} + +static inline int fcntl_getdeleg(struct file *filp, struct delegation *deleg) +{ + return F_UNLCK; +} + static inline bool lock_is_unlock(struct file_lock *fl) { return false; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 3741ea1b73d8500061567b6590ccf5fb4c6770f0..aae88f4b5c05205b2b28ae46b21bca9817197e04 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -79,6 +79,16 @@ */ #define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET +/* Set/Get delegations */ +#define F_GETDELEG (F_LINUX_SPECIFIC_BASE + 15) +#define F_SETDELEG (F_LINUX_SPECIFIC_BASE + 16) + +/* Argument structure for F_GETDELEG and F_SETDELEG */ +struct delegation { + short d_type; /* F_RDLCK, F_WRLCK, F_UNLCK */ + unsigned int d_flags; +}; + /* * Types of directory notifications that may be requested. */ -- 2.51.1