From: NeilBrown By not using the generic end_creating() name here we are free to use it more globally for a more generic function. This should have been done when start_creating() was renamed. For consistency, also rename failed_creating(). Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Acked-by: Greg Kroah-Hartman Signed-off-by: NeilBrown --- fs/debugfs/inode.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 661a99a7dfbe..f241b9df642a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -403,7 +403,7 @@ static struct dentry *debugfs_start_creating(const char *name, return dentry; } -static struct dentry *failed_creating(struct dentry *dentry) +static struct dentry *debugfs_failed_creating(struct dentry *dentry) { inode_unlock(d_inode(dentry->d_parent)); dput(dentry); @@ -411,7 +411,7 @@ static struct dentry *failed_creating(struct dentry *dentry) return ERR_PTR(-ENOMEM); } -static struct dentry *end_creating(struct dentry *dentry) +static struct dentry *debugfs_end_creating(struct dentry *dentry) { inode_unlock(d_inode(dentry->d_parent)); return dentry; @@ -435,7 +435,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, return dentry; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - failed_creating(dentry); + debugfs_failed_creating(dentry); return ERR_PTR(-EPERM); } @@ -443,7 +443,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, if (unlikely(!inode)) { pr_err("out of free dentries, can not create file '%s'\n", name); - return failed_creating(dentry); + return debugfs_failed_creating(dentry); } inode->i_mode = mode; @@ -458,7 +458,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, d_instantiate(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); - return end_creating(dentry); + return debugfs_end_creating(dentry); } struct dentry *debugfs_create_file_full(const char *name, umode_t mode, @@ -585,7 +585,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) return dentry; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - failed_creating(dentry); + debugfs_failed_creating(dentry); return ERR_PTR(-EPERM); } @@ -593,7 +593,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) if (unlikely(!inode)) { pr_err("out of free dentries, can not create directory '%s'\n", name); - return failed_creating(dentry); + return debugfs_failed_creating(dentry); } inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; @@ -605,7 +605,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) d_instantiate(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); - return end_creating(dentry); + return debugfs_end_creating(dentry); } EXPORT_SYMBOL_GPL(debugfs_create_dir); @@ -632,7 +632,7 @@ struct dentry *debugfs_create_automount(const char *name, return dentry; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - failed_creating(dentry); + debugfs_failed_creating(dentry); return ERR_PTR(-EPERM); } @@ -640,7 +640,7 @@ struct dentry *debugfs_create_automount(const char *name, if (unlikely(!inode)) { pr_err("out of free dentries, can not create automount '%s'\n", name); - return failed_creating(dentry); + return debugfs_failed_creating(dentry); } make_empty_dir_inode(inode); @@ -652,7 +652,7 @@ struct dentry *debugfs_create_automount(const char *name, d_instantiate(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); - return end_creating(dentry); + return debugfs_end_creating(dentry); } EXPORT_SYMBOL(debugfs_create_automount); @@ -699,13 +699,13 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, pr_err("out of free dentries, can not create symlink '%s'\n", name); kfree(link); - return failed_creating(dentry); + return debugfs_failed_creating(dentry); } inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_op = &debugfs_symlink_inode_operations; inode->i_link = link; d_instantiate(dentry, inode); - return end_creating(dentry); + return debugfs_end_creating(dentry); } EXPORT_SYMBOL_GPL(debugfs_create_symlink); -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown The fact that directory operations (create,remove,rename) are protected by a lock on the parent is known widely throughout the kernel. In order to change this - to instead lock the target dentry - it is best to centralise this knowledge so it can be changed in one place. This patch introduces start_dirop() which is local to VFS code. It performs the required locking for create and remove. Rename will be handled separately. Various functions with names like start_creating() or start_removing_path(), some of which already exist, will export this functionality beyond the VFS. end_dirop() is the partner of start_dirop(). It drops the lock and releases the reference on the dentry. It *is* exported so that various end_creating etc functions can be inline. As vfs_mkdir() drops the dentry on error we cannot use end_dirop() as that won't unlock when the dentry IS_ERR(). For now we need an explicit unlock when dentry IS_ERR(). I hope to change vfs_mkdir() to unlock when it drops a dentry so that explicit unlock can go away. end_dirop() can always be called on the result of start_dirop(), but not after vfs_mkdir(). After a vfs_mkdir() we still may need the explicit unlock as seen in end_creating_path(). As well as adding start_dirop() and end_dirop() this patch uses them in: - simple_start_creating (which requires sharing lookup_noperm_common() with libfs.c) - start_removing_path / start_removing_user_path_at - filename_create / end_creating_path() - do_rmdir(), do_unlinkat() Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- fs/internal.h | 3 ++ fs/libfs.c | 36 ++++++++--------- fs/namei.c | 98 ++++++++++++++++++++++++++++++++++------------ include/linux/fs.h | 2 + 4 files changed, 95 insertions(+), 44 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 9b2b4d116880..d08d5e2235e9 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -67,6 +67,9 @@ int vfs_tmpfile(struct mnt_idmap *idmap, const struct path *parentpath, struct file *file, umode_t mode); struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); +struct dentry *start_dirop(struct dentry *parent, struct qstr *name, + unsigned int lookup_flags); +int lookup_noperm_common(struct qstr *qname, struct dentry *base); /* * namespace.c diff --git a/fs/libfs.c b/fs/libfs.c index 1661dcb7d983..2d6657947abd 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -2290,27 +2290,25 @@ void stashed_dentry_prune(struct dentry *dentry) cmpxchg(stashed, dentry, NULL); } -/* parent must be held exclusive */ +/** + * simple_start_creating - prepare to create a given name + * @parent: directory in which to prepare to create the name + * @name: the name to be created + * + * Required lock is taken and a lookup in performed prior to creating an + * object in a directory. No permission checking is performed. + * + * Returns: a negative dentry on which vfs_create() or similar may + * be attempted, or an error. + */ struct dentry *simple_start_creating(struct dentry *parent, const char *name) { - struct dentry *dentry; - struct inode *dir = d_inode(parent); + struct qstr qname = QSTR(name); + int err; - inode_lock(dir); - if (unlikely(IS_DEADDIR(dir))) { - inode_unlock(dir); - return ERR_PTR(-ENOENT); - } - dentry = lookup_noperm(&QSTR(name), parent); - if (IS_ERR(dentry)) { - inode_unlock(dir); - return dentry; - } - if (dentry->d_inode) { - dput(dentry); - inode_unlock(dir); - return ERR_PTR(-EEXIST); - } - return dentry; + err = lookup_noperm_common(&qname, parent); + if (err) + return ERR_PTR(err); + return start_dirop(parent, &qname, LOOKUP_CREATE | LOOKUP_EXCL); } EXPORT_SYMBOL(simple_start_creating); diff --git a/fs/namei.c b/fs/namei.c index 39c4d52f5b54..231e1ffd4b8d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2765,6 +2765,48 @@ static int filename_parentat(int dfd, struct filename *name, return __filename_parentat(dfd, name, flags, parent, last, type, NULL); } +/** + * start_dirop - begin a create or remove dirop, performing locking and lookup + * @parent: the dentry of the parent in which the operation will occur + * @name: a qstr holding the name within that parent + * @lookup_flags: intent and other lookup flags. + * + * The lookup is performed and necessary locks are taken so that, on success, + * the returned dentry can be operated on safely. + * The qstr must already have the hash value calculated. + * + * Returns: a locked dentry, or an error. + * + */ +struct dentry *start_dirop(struct dentry *parent, struct qstr *name, + unsigned int lookup_flags) +{ + struct dentry *dentry; + struct inode *dir = d_inode(parent); + + inode_lock_nested(dir, I_MUTEX_PARENT); + dentry = lookup_one_qstr_excl(name, parent, lookup_flags); + if (IS_ERR(dentry)) + inode_unlock(dir); + return dentry; +} + +/** + * end_dirop - signal completion of a dirop + * @de: the dentry which was returned by start_dirop or similar. + * + * If the de is an error, nothing happens. Otherwise any lock taken to + * protect the dentry is dropped and the dentry itself is release (dput()). + */ +void end_dirop(struct dentry *de) +{ + if (!IS_ERR(de)) { + inode_unlock(de->d_parent->d_inode); + dput(de); + } +} +EXPORT_SYMBOL(end_dirop); + /* does lookup, returns the object with parent locked */ static struct dentry *__start_removing_path(int dfd, struct filename *name, struct path *path) @@ -2781,10 +2823,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name, return ERR_PTR(-EINVAL); /* don't fail immediately if it's r/o, at least try to report other errors */ error = mnt_want_write(parent_path.mnt); - inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl(&last, parent_path.dentry, 0); + d = start_dirop(parent_path.dentry, &last, 0); if (IS_ERR(d)) - goto unlock; + goto drop; if (error) goto fail; path->dentry = no_free_ptr(parent_path.dentry); @@ -2792,10 +2833,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name, return d; fail: - dput(d); + end_dirop(d); d = ERR_PTR(error); -unlock: - inode_unlock(parent_path.dentry->d_inode); +drop: if (!error) mnt_drop_write(parent_path.mnt); return d; @@ -2910,7 +2950,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(vfs_path_lookup); -static int lookup_noperm_common(struct qstr *qname, struct dentry *base) +int lookup_noperm_common(struct qstr *qname, struct dentry *base) { const char *name = qname->name; u32 len = qname->len; @@ -4223,21 +4263,18 @@ static struct dentry *filename_create(int dfd, struct filename *name, */ if (last.name[last.len] && !want_dir) create_flags &= ~LOOKUP_CREATE; - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - dentry = lookup_one_qstr_excl(&last, path->dentry, - reval_flag | create_flags); + dentry = start_dirop(path->dentry, &last, reval_flag | create_flags); if (IS_ERR(dentry)) - goto unlock; + goto out_drop_write; if (unlikely(error)) goto fail; return dentry; fail: - dput(dentry); + end_dirop(dentry); dentry = ERR_PTR(error); -unlock: - inode_unlock(path->dentry->d_inode); +out_drop_write: if (!error) mnt_drop_write(path->mnt); out: @@ -4256,11 +4293,26 @@ struct dentry *start_creating_path(int dfd, const char *pathname, } EXPORT_SYMBOL(start_creating_path); +/** + * end_creating_path - finish a code section started by start_creating_path() + * @path: the path instantiated by start_creating_path() + * @dentry: the dentry returned by start_creating_path() + * + * end_creating_path() will unlock and locks taken by start_creating_path() + * and drop an references that were taken. It should only be called + * if start_creating_path() returned a non-error. + * If vfs_mkdir() was called and it returned an error, that error *should* + * be passed to end_creating_path() together with the path. + */ void end_creating_path(const struct path *path, struct dentry *dentry) { - if (!IS_ERR(dentry)) - dput(dentry); - inode_unlock(path->dentry->d_inode); + if (IS_ERR(dentry)) + /* The parent is still locked despite the error from + * vfs_mkdir() - must unlock it. + */ + inode_unlock(path->dentry->d_inode); + else + end_dirop(dentry); mnt_drop_write(path->mnt); path_put(path); } @@ -4592,8 +4644,7 @@ int do_rmdir(int dfd, struct filename *name) if (error) goto exit2; - inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); - dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags); + dentry = start_dirop(path.dentry, &last, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit3; @@ -4602,9 +4653,8 @@ int do_rmdir(int dfd, struct filename *name) goto exit4; error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry); exit4: - dput(dentry); + end_dirop(dentry); exit3: - inode_unlock(path.dentry->d_inode); mnt_drop_write(path.mnt); exit2: path_put(&path); @@ -4721,8 +4771,7 @@ int do_unlinkat(int dfd, struct filename *name) if (error) goto exit2; retry_deleg: - inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); - dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags); + dentry = start_dirop(path.dentry, &last, lookup_flags); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -4737,9 +4786,8 @@ int do_unlinkat(int dfd, struct filename *name) error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, &delegated_inode); exit3: - dput(dentry); + end_dirop(dentry); } - inode_unlock(path.dentry->d_inode); if (inode) iput(inode); /* truncate the inode here */ inode = NULL; diff --git a/include/linux/fs.h b/include/linux/fs.h index 03e450dd5211..9e7556e79d19 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3196,6 +3196,8 @@ extern void iterate_supers_type(struct file_system_type *, void filesystems_freeze(void); void filesystems_thaw(void); +void end_dirop(struct dentry *de); + extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown The simplification of locking in the previous patch opens up some room for tidying up do_unlinkat() - change all "exit" labels to describe what will happen at the label. - always goto an exit label on an error - unwrap the "if (!IS_ERR())" branch. - Move the "slashes" handing inline, but mark it as unlikely() - simplify use of the "inode" variable - we no longer need to test for NULL. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- fs/namei.c | 55 ++++++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 231e1ffd4b8d..93c5fce2d814 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4755,65 +4755,62 @@ int do_unlinkat(int dfd, struct filename *name) struct path path; struct qstr last; int type; - struct inode *inode = NULL; + struct inode *inode; struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) - goto exit1; + goto exit_putname; error = -EISDIR; if (type != LAST_NORM) - goto exit2; + goto exit_path_put; error = mnt_want_write(path.mnt); if (error) - goto exit2; + goto exit_path_put; retry_deleg: dentry = start_dirop(path.dentry, &last, lookup_flags); error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { + if (IS_ERR(dentry)) + goto exit_drop_write; - /* Why not before? Because we want correct error value */ - if (last.name[last.len]) - goto slashes; - inode = dentry->d_inode; - ihold(inode); - error = security_path_unlink(&path, dentry); - if (error) - goto exit3; - error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, &delegated_inode); -exit3: + /* Why not before? Because we want correct error value */ + if (unlikely(last.name[last.len])) { + if (d_is_dir(dentry)) + error = -EISDIR; + else + error = -ENOTDIR; end_dirop(dentry); + goto exit_drop_write; } - if (inode) - iput(inode); /* truncate the inode here */ - inode = NULL; + inode = dentry->d_inode; + ihold(inode); + error = security_path_unlink(&path, dentry); + if (error) + goto exit_end_dirop; + error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry, &delegated_inode); +exit_end_dirop: + end_dirop(dentry); + iput(inode); /* truncate the inode here */ if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } +exit_drop_write: mnt_drop_write(path.mnt); -exit2: +exit_path_put: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; - inode = NULL; goto retry; } -exit1: +exit_putname: putname(name); return error; - -slashes: - if (d_is_dir(dentry)) - error = -EISDIR; - else - error = -ENOTDIR; - goto exit3; } SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown start_creating() is similar to simple_start_creating() but is not so simple. It takes a qstr for the name, includes permission checking, and does NOT report an error if the name already exists, returning a positive dentry instead. This is currently used by nfsd, cachefiles, and overlayfs. end_creating() is called after the dentry has been used. end_creating() drops the reference to the dentry as it is generally no longer needed. This is exactly the first section of end_creating_path() so that function is changed to call the new end_creating() These calls help encapsulate locking rules so that directory locking can be changed. Occasionally this change means that the parent lock is held for a shorter period of time, for example in cachefiles_commit_tmpfile(). As this function now unlocks after an unlink and before the following lookup, it is possible that the lookup could again find a positive dentry, so a while loop is introduced there. In overlayfs the ovl_lookup_temp() function has ovl_tempname() split out to be used in ovl_start_creating_temp(). The other use of ovl_lookup_temp() is preparing for a rename. When rename handling is updated, ovl_lookup_temp() will be removed. Reviewed-by: Jeff Layton Reviewed-by: Amir Goldstein Signed-off-by: NeilBrown --- changes since v4 Change "PTR_ERR(whiteout)" to "err" in ovl_whiteout() - thanks to Dan Carpenter. --- fs/cachefiles/namei.c | 41 ++++++++-------- fs/namei.c | 35 +++++++++++--- fs/nfsd/nfs3proc.c | 14 ++---- fs/nfsd/nfs4proc.c | 14 ++---- fs/nfsd/nfs4recover.c | 16 +++---- fs/nfsd/nfsproc.c | 11 ++--- fs/nfsd/vfs.c | 52 ++++++++------------ fs/overlayfs/copy_up.c | 19 ++++---- fs/overlayfs/dir.c | 100 +++++++++++++++++++++++---------------- fs/overlayfs/overlayfs.h | 8 ++++ fs/overlayfs/super.c | 32 +++++++------ include/linux/namei.h | 33 +++++++++++++ 12 files changed, 215 insertions(+), 160 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index d1edb2ac3837..0a136eb434da 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -93,12 +93,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, _enter(",,%s", dirname); /* search the current directory for the element name */ - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); retry: ret = cachefiles_inject_read_error(); if (ret == 0) - subdir = lookup_one(&nop_mnt_idmap, &QSTR(dirname), dir); + subdir = start_creating(&nop_mnt_idmap, dir, &QSTR(dirname)); else subdir = ERR_PTR(ret); trace_cachefiles_lookup(NULL, dir, subdir); @@ -141,7 +140,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, trace_cachefiles_mkdir(dir, subdir); if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) { - dput(subdir); + end_creating(subdir, dir); goto retry; } ASSERT(d_backing_inode(subdir)); @@ -154,7 +153,8 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* Tell rmdir() it's not allowed to delete the subdir */ inode_lock(d_inode(subdir)); - inode_unlock(d_inode(dir)); + dget(subdir); + end_creating(subdir, dir); if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) { pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n", @@ -196,14 +196,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, return ERR_PTR(-EBUSY); mkdir_error: - inode_unlock(d_inode(dir)); - if (!IS_ERR(subdir)) - dput(subdir); + end_creating(subdir, dir); pr_err("mkdir %s failed with error %d\n", dirname, ret); return ERR_PTR(ret); lookup_error: - inode_unlock(d_inode(dir)); ret = PTR_ERR(subdir); pr_err("Lookup %s failed with error %d\n", dirname, ret); return ERR_PTR(ret); @@ -679,36 +676,41 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, _enter(",%pD", object->file); - inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); ret = cachefiles_inject_read_error(); if (ret == 0) - dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan); + dentry = start_creating(&nop_mnt_idmap, fan, &QSTR(object->d_name)); else dentry = ERR_PTR(ret); if (IS_ERR(dentry)) { trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry), cachefiles_trace_lookup_error); _debug("lookup fail %ld", PTR_ERR(dentry)); - goto out_unlock; + goto out; } - if (!d_is_negative(dentry)) { + /* + * This loop will only execute more than once if some other thread + * races to create the object we are trying to create. + */ + while (!d_is_negative(dentry)) { ret = cachefiles_unlink(volume->cache, object, fan, dentry, FSCACHE_OBJECT_IS_STALE); if (ret < 0) - goto out_dput; + goto out_end; + + end_creating(dentry, fan); - dput(dentry); ret = cachefiles_inject_read_error(); if (ret == 0) - dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan); + dentry = start_creating(&nop_mnt_idmap, fan, + &QSTR(object->d_name)); else dentry = ERR_PTR(ret); if (IS_ERR(dentry)) { trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry), cachefiles_trace_lookup_error); _debug("lookup fail %ld", PTR_ERR(dentry)); - goto out_unlock; + goto out; } } @@ -729,10 +731,9 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, success = true; } -out_dput: - dput(dentry); -out_unlock: - inode_unlock(d_inode(fan)); +out_end: + end_creating(dentry, fan); +out: _leave(" = %u", success); return success; } diff --git a/fs/namei.c b/fs/namei.c index 93c5fce2d814..8873ad0f05b0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3221,6 +3221,33 @@ struct dentry *lookup_noperm_positive_unlocked(struct qstr *name, } EXPORT_SYMBOL(lookup_noperm_positive_unlocked); +/** + * start_creating - prepare to create a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to prepare to create the name + * @name: the name to be created + * + * Locks are taken and a lookup is performed prior to creating + * an object in a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name already exists, a positive dentry is returned, so + * behaviour is similar to O_CREAT without O_EXCL, which doesn't fail + * with -EEXIST. + * + * Returns: a negative or positive dentry, or an error. + */ +struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, LOOKUP_CREATE); +} +EXPORT_SYMBOL(start_creating); + #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { @@ -4306,13 +4333,7 @@ EXPORT_SYMBOL(start_creating_path); */ void end_creating_path(const struct path *path, struct dentry *dentry) { - if (IS_ERR(dentry)) - /* The parent is still locked despite the error from - * vfs_mkdir() - must unlock it. - */ - inode_unlock(path->dentry->d_inode); - else - end_dirop(dentry); + end_creating(dentry, path->dentry); mnt_drop_write(path->mnt); path_put(path); } diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index b6d03e1ef5f7..e2aac0def2cb 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -281,14 +281,11 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - inode_lock_nested(inode, I_MUTEX_PARENT); - - child = lookup_one(&nop_mnt_idmap, - &QSTR_LEN(argp->name, argp->len), - parent); + child = start_creating(&nop_mnt_idmap, parent, + &QSTR_LEN(argp->name, argp->len)); if (IS_ERR(child)) { status = nfserrno(PTR_ERR(child)); - goto out; + goto out_write; } if (d_really_is_negative(child)) { @@ -367,9 +364,8 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); out: - inode_unlock(inode); - if (child && !IS_ERR(child)) - dput(child); + end_creating(child, parent); +out_write: fh_drop_write(fhp); return status; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e466cf52d7d7..b2c95e8e7c68 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -264,14 +264,11 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (is_create_with_attrs(open)) nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs); - inode_lock_nested(inode, I_MUTEX_PARENT); - - child = lookup_one(&nop_mnt_idmap, - &QSTR_LEN(open->op_fname, open->op_fnamelen), - parent); + child = start_creating(&nop_mnt_idmap, parent, + &QSTR_LEN(open->op_fname, open->op_fnamelen)); if (IS_ERR(child)) { status = nfserrno(PTR_ERR(child)); - goto out; + goto out_write; } if (d_really_is_negative(child)) { @@ -379,10 +376,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (attrs.na_aclerr) open->op_bmval[0] &= ~FATTR4_WORD0_ACL; out: - inode_unlock(inode); + end_creating(child, parent); nfsd_attrs_free(&attrs); - if (child && !IS_ERR(child)) - dput(child); +out_write: fh_drop_write(fhp); return status; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e2b9472e5c78..c247a7c3291c 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -195,13 +195,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) goto out_creds; dir = nn->rec_file->f_path.dentry; - /* lock the parent */ - inode_lock(d_inode(dir)); - dentry = lookup_one(&nop_mnt_idmap, &QSTR(dname), dir); + dentry = start_creating(&nop_mnt_idmap, dir, &QSTR(dname)); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); - goto out_unlock; + goto out; } if (d_really_is_positive(dentry)) /* @@ -212,15 +210,13 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * In the 4.0 case, we should never get here; but we may * as well be forgiving and just succeed silently. */ - goto out_put; + goto out_end; dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); if (IS_ERR(dentry)) status = PTR_ERR(dentry); -out_put: - if (!status) - dput(dentry); -out_unlock: - inode_unlock(d_inode(dir)); +out_end: + end_creating(dentry, dir); +out: if (status == 0) { if (nn->in_grace) __nfsd4_create_reclaim_record_grace(clp, dname, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 8f71f5748c75..ee1b16e921fd 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -306,18 +306,16 @@ nfsd_proc_create(struct svc_rqst *rqstp) goto done; } - inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT); - dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(argp->name, argp->len), - dirfhp->fh_dentry); + dchild = start_creating(&nop_mnt_idmap, dirfhp->fh_dentry, + &QSTR_LEN(argp->name, argp->len)); if (IS_ERR(dchild)) { resp->status = nfserrno(PTR_ERR(dchild)); - goto out_unlock; + goto out_write; } fh_init(newfhp, NFS_FHSIZE); resp->status = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp); if (!resp->status && d_really_is_negative(dchild)) resp->status = nfserr_noent; - dput(dchild); if (resp->status) { if (resp->status != nfserr_noent) goto out_unlock; @@ -423,7 +421,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) } out_unlock: - inode_unlock(dirfhp->fh_dentry->d_inode); + end_creating(dchild, dirfhp->fh_dentry); +out_write: fh_drop_write(dirfhp); done: fh_put(dirfhp); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cf4062ac092a..24e501abad0e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1521,7 +1521,7 @@ nfsd_check_ignore_resizing(struct iattr *iap) iap->ia_valid &= ~ATTR_SIZE; } -/* The parent directory should already be locked: */ +/* The parent directory should already be locked - we will unlock */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_attrs *attrs, @@ -1587,8 +1587,9 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); out: - if (!IS_ERR(dchild)) - dput(dchild); + if (!err) + fh_fill_post_attrs(fhp); + end_creating(dchild, dentry); return err; out_nfserr: @@ -1626,28 +1627,26 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); - dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); + dchild = start_creating(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen)); host_err = PTR_ERR(dchild); - if (IS_ERR(dchild)) { - err = nfserrno(host_err); - goto out_unlock; - } + if (IS_ERR(dchild)) + return nfserrno(host_err); + err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); /* * We unconditionally drop our ref to dchild as fh_compose will have * already grabbed its own ref for it. */ - dput(dchild); if (err) goto out_unlock; err = fh_fill_pre_attrs(fhp); if (err != nfs_ok) goto out_unlock; err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp); - fh_fill_post_attrs(fhp); + return err; + out_unlock: - inode_unlock(dentry->d_inode); + end_creating(dchild, dentry); return err; } @@ -1733,11 +1732,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, } dentry = fhp->fh_dentry; - inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); - dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); + dnew = start_creating(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen)); if (IS_ERR(dnew)) { err = nfserrno(PTR_ERR(dnew)); - inode_unlock(dentry->d_inode); goto out_drop_write; } err = fh_fill_pre_attrs(fhp); @@ -1750,11 +1747,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_create_setattr(rqstp, fhp, resfhp, attrs); fh_fill_post_attrs(fhp); out_unlock: - inode_unlock(dentry->d_inode); + end_creating(dnew, dentry); if (!err) err = nfserrno(commit_metadata(fhp)); - dput(dnew); - if (err==0) err = cerr; + if (!err) + err = cerr; out_drop_write: fh_drop_write(fhp); out: @@ -1809,32 +1806,31 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, ddir = ffhp->fh_dentry; dirp = d_inode(ddir); - inode_lock_nested(dirp, I_MUTEX_PARENT); + dnew = start_creating(&nop_mnt_idmap, ddir, &QSTR_LEN(name, len)); - dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(name, len), ddir); if (IS_ERR(dnew)) { host_err = PTR_ERR(dnew); - goto out_unlock; + goto out_drop_write; } dold = tfhp->fh_dentry; err = nfserr_noent; if (d_really_is_negative(dold)) - goto out_dput; + goto out_unlock; err = fh_fill_pre_attrs(ffhp); if (err != nfs_ok) - goto out_dput; + goto out_unlock; host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL); fh_fill_post_attrs(ffhp); - inode_unlock(dirp); +out_unlock: + end_creating(dnew, ddir); if (!host_err) { host_err = commit_metadata(ffhp); if (!host_err) host_err = commit_metadata(tfhp); } - dput(dnew); out_drop_write: fh_drop_write(tfhp); if (host_err == -EBUSY) { @@ -1849,12 +1845,6 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, } out: return err != nfs_ok ? err : nfserrno(host_err); - -out_dput: - dput(dnew); -out_unlock: - inode_unlock(dirp); - goto out_drop_write; } static void diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 604a82acd164..e2bdac4317e7 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -613,9 +613,9 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) if (err) goto out; - inode_lock_nested(udir, I_MUTEX_PARENT); - upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir, - c->dentry->d_name.len); + upper = ovl_start_creating_upper(ofs, upperdir, + &QSTR_LEN(c->dentry->d_name.name, + c->dentry->d_name.len)); err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper); @@ -626,9 +626,8 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, upper); } - dput(upper); + end_creating(upper, upperdir); } - inode_unlock(udir); if (err) goto out; @@ -894,16 +893,14 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) if (err) goto out; - inode_lock_nested(udir, I_MUTEX_PARENT); - - upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir, - c->destname.len); + upper = ovl_start_creating_upper(ofs, c->destdir, + &QSTR_LEN(c->destname.name, + c->destname.len)); err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, temp, udir, upper); - dput(upper); + end_creating(upper, c->destdir); } - inode_unlock(udir); if (err) goto out; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 83b955a1d55c..b9160fefbd00 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -59,15 +59,21 @@ int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, return 0; } -struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir) +#define OVL_TEMPNAME_SIZE 20 +static void ovl_tempname(char name[OVL_TEMPNAME_SIZE]) { - struct dentry *temp; - char name[20]; static atomic_t temp_id = ATOMIC_INIT(0); /* counter is allowed to wrap, since temp dentries are ephemeral */ - snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id)); + snprintf(name, OVL_TEMPNAME_SIZE, "#%x", atomic_inc_return(&temp_id)); +} + +struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir) +{ + struct dentry *temp; + char name[OVL_TEMPNAME_SIZE]; + ovl_tempname(name); temp = ovl_lookup_upper(ofs, name, workdir, strlen(name)); if (!IS_ERR(temp) && temp->d_inode) { pr_err("workdir/%s already exists\n", name); @@ -78,48 +84,52 @@ struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir) return temp; } +static struct dentry *ovl_start_creating_temp(struct ovl_fs *ofs, + struct dentry *workdir) +{ + char name[OVL_TEMPNAME_SIZE]; + + ovl_tempname(name); + return start_creating(ovl_upper_mnt_idmap(ofs), workdir, + &QSTR(name)); +} + static struct dentry *ovl_whiteout(struct ovl_fs *ofs) { int err; - struct dentry *whiteout; + struct dentry *whiteout, *link; struct dentry *workdir = ofs->workdir; struct inode *wdir = workdir->d_inode; guard(mutex)(&ofs->whiteout_lock); if (!ofs->whiteout) { - inode_lock_nested(wdir, I_MUTEX_PARENT); - whiteout = ovl_lookup_temp(ofs, workdir); - if (!IS_ERR(whiteout)) { - err = ovl_do_whiteout(ofs, wdir, whiteout); - if (err) { - dput(whiteout); - whiteout = ERR_PTR(err); - } - } - inode_unlock(wdir); + whiteout = ovl_start_creating_temp(ofs, workdir); if (IS_ERR(whiteout)) return whiteout; - ofs->whiteout = whiteout; + err = ovl_do_whiteout(ofs, wdir, whiteout); + if (!err) + ofs->whiteout = dget(whiteout); + end_creating(whiteout, workdir); + if (err) + return ERR_PTR(err); } if (!ofs->no_shared_whiteout) { - inode_lock_nested(wdir, I_MUTEX_PARENT); - whiteout = ovl_lookup_temp(ofs, workdir); - if (!IS_ERR(whiteout)) { - err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout); - if (err) { - dput(whiteout); - whiteout = ERR_PTR(err); - } - } - inode_unlock(wdir); - if (!IS_ERR(whiteout)) - return whiteout; - if (PTR_ERR(whiteout) != -EMLINK) { - pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%lu)\n", + link = ovl_start_creating_temp(ofs, workdir); + if (IS_ERR(link)) + return link; + err = ovl_do_link(ofs, ofs->whiteout, wdir, link); + if (!err) + whiteout = dget(link); + end_creating(link, workdir); + if (!err) + return whiteout;; + + if (err != -EMLINK) { + pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%u)\n", ofs->whiteout->d_inode->i_nlink, - PTR_ERR(whiteout)); + err); ofs->no_shared_whiteout = true; } } @@ -252,10 +262,13 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr) { struct dentry *ret; - inode_lock_nested(workdir->d_inode, I_MUTEX_PARENT); - ret = ovl_create_real(ofs, workdir, - ovl_lookup_temp(ofs, workdir), attr); - inode_unlock(workdir->d_inode); + ret = ovl_start_creating_temp(ofs, workdir); + if (IS_ERR(ret)) + return ret; + ret = ovl_create_real(ofs, workdir, ret, attr); + if (!IS_ERR(ret)) + dget(ret); + end_creating(ret, workdir); return ret; } @@ -354,18 +367,21 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); - struct inode *udir = upperdir->d_inode; struct dentry *newdentry; int err; - inode_lock_nested(udir, I_MUTEX_PARENT); - newdentry = ovl_create_real(ofs, upperdir, - ovl_lookup_upper(ofs, dentry->d_name.name, - upperdir, dentry->d_name.len), - attr); - inode_unlock(udir); + newdentry = ovl_start_creating_upper(ofs, upperdir, + &QSTR_LEN(dentry->d_name.name, + dentry->d_name.len)); if (IS_ERR(newdentry)) return PTR_ERR(newdentry); + newdentry = ovl_create_real(ofs, upperdir, newdentry, attr); + if (IS_ERR(newdentry)) { + end_creating(newdentry, upperdir); + return PTR_ERR(newdentry); + } + dget(newdentry); + end_creating(newdentry, upperdir); if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) && !ovl_allow_offline_changes(ofs)) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c8fd5951fc5e..beeba96cfcb2 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -415,6 +415,14 @@ static inline struct dentry *ovl_lookup_upper_unlocked(struct ovl_fs *ofs, &QSTR_LEN(name, len), base); } +static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs, + struct dentry *parent, + struct qstr *name) +{ + return start_creating(ovl_upper_mnt_idmap(ofs), + parent, name); +} + static inline bool ovl_open_flags_need_copy_up(int flags) { if (!flags) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 43ee4c7296a7..6e0816c1147a 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -310,8 +310,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, bool retried = false; retry: - inode_lock_nested(dir, I_MUTEX_PARENT); - work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name)); + work = ovl_start_creating_upper(ofs, ofs->workbasedir, &QSTR(name)); if (!IS_ERR(work)) { struct iattr attr = { @@ -320,14 +319,13 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, }; if (work->d_inode) { + dget(work); + end_creating(work, ofs->workbasedir); + if (persist) + return work; err = -EEXIST; - inode_unlock(dir); if (retried) goto out_dput; - - if (persist) - return work; - retried = true; err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt, work, 0); dput(work); @@ -338,7 +336,9 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, } work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); - inode_unlock(dir); + if (!IS_ERR(work)) + dget(work); + end_creating(work, ofs->workbasedir); err = PTR_ERR(work); if (IS_ERR(work)) goto out_err; @@ -376,7 +376,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, if (err) goto out_dput; } else { - inode_unlock(dir); err = PTR_ERR(work); goto out_err; } @@ -626,14 +625,17 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, struct dentry *parent, const char *name, umode_t mode) { - size_t len = strlen(name); struct dentry *child; - inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); - child = ovl_lookup_upper(ofs, name, parent, len); - if (!IS_ERR(child) && !child->d_inode) - child = ovl_create_real(ofs, parent, child, OVL_CATTR(mode)); - inode_unlock(parent->d_inode); + child = ovl_start_creating_upper(ofs, parent, &QSTR(name)); + if (!IS_ERR(child)) { + if (!child->d_inode) + child = ovl_create_real(ofs, parent, child, + OVL_CATTR(mode)); + if (!IS_ERR(child)) + dget(child); + end_creating(child, parent); + } dput(parent); return child; diff --git a/include/linux/namei.h b/include/linux/namei.h index b0679c7420a8..37b72f4a64f0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -89,6 +89,39 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap, struct qstr *name, struct dentry *base); +struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, + struct qstr *name); + +/** + * end_creating - finish action started with start_creating + * @child: dentry returned by start_creating() or vfs_mkdir() + * @parent: dentry given to start_creating(), + * + * Unlock and release the child. + * + * Unlike end_dirop() this can only be called if start_creating() succeeded. + * It handles @child being and error as vfs_mkdir() might have converted the + * dentry to an error - in that case the parent still needs to be unlocked. + * + * If vfs_mkdir() was called then the value returned from that function + * should be given for @child rather than the original dentry, as vfs_mkdir() + * may have provided a new dentry. Even if vfs_mkdir() returns an error + * it must be given to end_creating(). + * + * If vfs_mkdir() was not called, then @child will be a valid dentry and + * @parent will be ignored. + */ +static inline void end_creating(struct dentry *child, struct dentry *parent) +{ + if (IS_ERR(child)) + /* The parent is still locked despite the error from + * vfs_mkdir() - must unlock it. + */ + inode_unlock(parent->d_inode); + else + end_dirop(child); +} + extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown start_removing() is similar to start_creating() but will only return a positive dentry with the expectation that it will be removed. This is used by nfsd, cachefiles, and overlayfs. They are changed to also use end_removing() to terminate the action begun by start_removing(). This is a simple alias for end_dirop(). Apart from changes to the error paths, as we no longer need to unlock on a lookup error, an effect on callers is that they don't need to test if the found dentry is positive or negative - they can be sure it is positive. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- fs/cachefiles/namei.c | 32 ++++++++++++++------------------ fs/namei.c | 27 +++++++++++++++++++++++++++ fs/nfsd/nfs4recover.c | 18 +++++------------- fs/nfsd/vfs.c | 26 ++++++++++---------------- fs/overlayfs/dir.c | 15 +++++++-------- fs/overlayfs/overlayfs.h | 8 ++++++++ include/linux/namei.h | 18 ++++++++++++++++++ 7 files changed, 89 insertions(+), 55 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 0a136eb434da..c7f0c6ab9b88 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache, * - File backed objects are unlinked * - Directory backed objects are stuffed into the graveyard for userspace to * delete + * On entry dir must be locked. It will be unlocked on exit. */ int cachefiles_bury_object(struct cachefiles_cache *cache, struct cachefiles_object *object, @@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache, _enter(",'%pd','%pd'", dir, rep); + /* end_removing() will dput() @rep but we need to keep + * a ref, so take one now. This also stops the dentry + * being negated when unlinked which we need. + */ + dget(rep); + if (rep->d_parent != dir) { - inode_unlock(d_inode(dir)); + end_removing(rep); _leave(" = -ESTALE"); return -ESTALE; } /* non-directories can just be unlinked */ if (!d_is_dir(rep)) { - dget(rep); /* Stop the dentry being negated if it's only pinned - * by a file struct. - */ ret = cachefiles_unlink(cache, object, dir, rep, why); - dput(rep); + end_removing(rep); - inode_unlock(d_inode(dir)); _leave(" = %d", ret); return ret; } /* directories have to be moved to the graveyard */ _debug("move stale object to graveyard"); - inode_unlock(d_inode(dir)); + end_removing(rep); try_again: /* first step is to make up a grave dentry in the graveyard */ @@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache, struct dentry *victim; int ret = -ENOENT; - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename)); - victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir); if (IS_ERR(victim)) goto lookup_error; - if (d_is_negative(victim)) - goto lookup_put; if (d_inode(victim)->i_flags & S_KERNEL_FILE) goto lookup_busy; return victim; lookup_busy: ret = -EBUSY; -lookup_put: - inode_unlock(d_inode(dir)); - dput(victim); + end_removing(victim); return ERR_PTR(ret); lookup_error: - inode_unlock(d_inode(dir)); ret = PTR_ERR(victim); if (ret == -ENOENT) return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */ @@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, ret = cachefiles_bury_object(cache, NULL, dir, victim, FSCACHE_OBJECT_WAS_CULLED); + dput(victim); if (ret < 0) goto error; fscache_count_culled(); - dput(victim); _leave(" = 0"); return 0; error_unlock: - inode_unlock(d_inode(dir)); + end_removing(victim); error: - dput(victim); if (ret == -ENOENT) return -ESTALE; /* Probably got retired by the netfs */ diff --git a/fs/namei.c b/fs/namei.c index 8873ad0f05b0..38dda29552f6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, } EXPORT_SYMBOL(start_creating); +/** + * start_removing - prepare to remove a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to find the name + * @name: the name to be removed + * + * Locks are taken and a lookup in performed prior to removing + * an object from a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name doesn't exist, an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * Returns: a positive dentry, or an error. + */ +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, 0); +} +EXPORT_SYMBOL(start_removing); + #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index c247a7c3291c..3eefaa2202e3 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn) dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name); dir = nn->rec_file->f_path.dentry; - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); - dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir); - if (IS_ERR(dentry)) { - status = PTR_ERR(dentry); - goto out_unlock; - } - status = -ENOENT; - if (d_really_is_negative(dentry)) - goto out; + dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name)); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry); -out: - dput(dentry); -out_unlock: - inode_unlock(d_inode(dir)); + end_removing(dentry); return status; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 24e501abad0e..6291c371caa7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, { struct dentry *dentry, *rdentry; struct inode *dirp; - struct inode *rinode; + struct inode *rinode = NULL; __be32 err; int host_err; @@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, dentry = fhp->fh_dentry; dirp = d_inode(dentry); - inode_lock_nested(dirp, I_MUTEX_PARENT); - rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); + rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen)); + host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_unlock; + goto out_drop_write; - if (d_really_is_negative(rdentry)) { - dput(rdentry); - host_err = -ENOENT; - goto out_unlock; - } - rinode = d_inode(rdentry); err = fh_fill_pre_attrs(fhp); if (err != nfs_ok) goto out_unlock; + rinode = d_inode(rdentry); + /* Prevent truncation until after locks dropped */ ihold(rinode); + if (!type) type = d_inode(rdentry)->i_mode & S_IFMT; @@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, } fh_fill_post_attrs(fhp); - inode_unlock(dirp); - if (!host_err) +out_unlock: + end_removing(rdentry); + if (!err && !host_err) host_err = commit_metadata(fhp); - dput(rdentry); iput(rinode); /* truncate the inode here */ out_drop_write: @@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, } out: return err != nfs_ok ? err : nfserrno(host_err); -out_unlock: - inode_unlock(dirp); - goto out_drop_write; } /* diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index b9160fefbd00..20682afdbd20 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, goto out; } - inode_lock_nested(dir, I_MUTEX_PARENT); - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, - dentry->d_name.len); + upper = ovl_start_removing_upper(ofs, upperdir, + &QSTR_LEN(dentry->d_name.name, + dentry->d_name.len)); err = PTR_ERR(upper); if (IS_ERR(upper)) - goto out_unlock; + goto out_dput; err = -ESTALE; if ((opaquedir && upper != opaquedir) || (!opaquedir && !ovl_matches_upper(dentry, upper))) - goto out_dput_upper; + goto out_unlock; if (is_dir) err = ovl_do_rmdir(ofs, dir, upper); @@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, */ if (!err) d_drop(dentry); -out_dput_upper: - dput(upper); out_unlock: - inode_unlock(dir); + end_removing(upper); +out_dput: dput(opaquedir); out: return err; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index beeba96cfcb2..49ad65f829dc 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs, parent, name); } +static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs, + struct dentry *parent, + struct qstr *name) +{ + return start_removing(ovl_upper_mnt_idmap(ofs), + parent, name); +} + static inline bool ovl_open_flags_need_copy_up(int flags) { if (!flags) diff --git a/include/linux/namei.h b/include/linux/namei.h index 37b72f4a64f0..6d1069f93ebf 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -91,6 +91,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap, struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, struct qstr *name); +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, + struct qstr *name); /** * end_creating - finish action started with start_creating @@ -122,6 +124,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent) end_dirop(child); } +/** + * end_removing - finish action started with start_removing + * @child: dentry returned by start_removing() + * @parent: dentry given to start_removing() + * + * Unlock and release the child. + * + * This is identical to end_dirop(). It can be passed the result of + * start_removing() whether that was successful or not, but it not needed + * if start_removing() failed. + */ +static inline void end_removing(struct dentry *child) +{ + end_dirop(child); +} + extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown xfs, fuse, ipc/mqueue need variants of start_creating or start_removing which do not check permissions. This patch adds _noperm versions of these functions. Note that do_mq_open() was only calling mntget() so it could call path_put() - it didn't really need an extra reference on the mnt. Now it doesn't call mntget() and uses end_creating() which does the dput() half of path_put(). Also mq_unlink() previously passed d_inode(dentry->d_parent) as the dir inode to vfs_unlink(). This is after locking d_inode(mnt->mnt_root) These two inodes are the same, but normally calls use the textual parent. So I've changes the vfs_unlink() call to be given d_inode(mnt->mnt_root). Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown -- changes since v2: - dir arg passed to vfs_unlink() in mq_unlink() changed to match the dir passed to lookup_noperm() - restore assignment to path->mnt even though the mntget() is removed. --- fs/fuse/dir.c | 19 +++++++--------- fs/namei.c | 48 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/orphanage.c | 11 ++++----- include/linux/namei.h | 2 ++ ipc/mqueue.c | 32 ++++++++++----------------- 5 files changed, 74 insertions(+), 38 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 316922d5dd13..a0d5b302bcc2 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1397,27 +1397,25 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, if (!parent) return -ENOENT; - inode_lock_nested(parent, I_MUTEX_PARENT); if (!S_ISDIR(parent->i_mode)) - goto unlock; + goto put_parent; err = -ENOENT; dir = d_find_alias(parent); if (!dir) - goto unlock; + goto put_parent; - name->hash = full_name_hash(dir, name->name, name->len); - entry = d_lookup(dir, name); + entry = start_removing_noperm(dir, name); dput(dir); - if (!entry) - goto unlock; + if (IS_ERR(entry)) + goto put_parent; fuse_dir_changed(parent); if (!(flags & FUSE_EXPIRE_ONLY)) d_invalidate(entry); fuse_invalidate_entry_cache(entry); - if (child_nodeid != 0 && d_really_is_positive(entry)) { + if (child_nodeid != 0) { inode_lock(d_inode(entry)); if (get_node_id(d_inode(entry)) != child_nodeid) { err = -ENOENT; @@ -1445,10 +1443,9 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, } else { err = 0; } - dput(entry); - unlock: - inode_unlock(parent); + end_removing(entry); + put_parent: iput(parent); return err; } diff --git a/fs/namei.c b/fs/namei.c index 38dda29552f6..da01b828ede6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3275,6 +3275,54 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, } EXPORT_SYMBOL(start_removing); +/** + * start_creating_noperm - prepare to create a given name without permission checking + * @parent: directory in which to prepare to create the name + * @name: the name to be created + * + * Locks are taken and a lookup in performed prior to creating + * an object in a directory. + * + * If the name already exists, a positive dentry is returned. + * + * Returns: a negative or positive dentry, or an error. + */ +struct dentry *start_creating_noperm(struct dentry *parent, + struct qstr *name) +{ + int err = lookup_noperm_common(name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, LOOKUP_CREATE); +} +EXPORT_SYMBOL(start_creating_noperm); + +/** + * start_removing_noperm - prepare to remove a given name without permission checking + * @parent: directory in which to find the name + * @name: the name to be removed + * + * Locks are taken and a lookup in performed prior to removing + * an object from a directory. + * + * If the name doesn't exist, an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * Returns: a positive dentry, or an error. + */ +struct dentry *start_removing_noperm(struct dentry *parent, + struct qstr *name) +{ + int err = lookup_noperm_common(name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, 0); +} +EXPORT_SYMBOL(start_removing_noperm); + #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 9c12cb844231..e732605924a1 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -152,11 +152,10 @@ xrep_orphanage_create( } /* Try to find the orphanage directory. */ - inode_lock_nested(root_inode, I_MUTEX_PARENT); - orphanage_dentry = lookup_noperm(&QSTR(ORPHANAGE), root_dentry); + orphanage_dentry = start_creating_noperm(root_dentry, &QSTR(ORPHANAGE)); if (IS_ERR(orphanage_dentry)) { error = PTR_ERR(orphanage_dentry); - goto out_unlock_root; + goto out_dput_root; } /* @@ -170,7 +169,7 @@ xrep_orphanage_create( orphanage_dentry, 0750); error = PTR_ERR(orphanage_dentry); if (IS_ERR(orphanage_dentry)) - goto out_unlock_root; + goto out_dput_orphanage; } /* Not a directory? Bail out. */ @@ -200,9 +199,7 @@ xrep_orphanage_create( sc->orphanage_ilock_flags = 0; out_dput_orphanage: - dput(orphanage_dentry); -out_unlock_root: - inode_unlock(VFS_I(sc->mp->m_rootip)); + end_creating(orphanage_dentry, root_dentry); out_dput_root: dput(root_dentry); out: diff --git a/include/linux/namei.h b/include/linux/namei.h index 6d1069f93ebf..0441f5921f87 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -93,6 +93,8 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, struct qstr *name); struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, struct qstr *name); +struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name); +struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name); /** * end_creating - finish action started with start_creating diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 093551fe66a7..6d7610310003 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -913,13 +913,12 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, goto out_putname; ro = mnt_want_write(mnt); /* we'll drop it in any case */ - inode_lock(d_inode(root)); - path.dentry = lookup_noperm(&QSTR(name->name), root); + path.dentry = start_creating_noperm(root, &QSTR(name->name)); if (IS_ERR(path.dentry)) { error = PTR_ERR(path.dentry); goto out_putfd; } - path.mnt = mntget(mnt); + path.mnt = mnt; error = prepare_open(path.dentry, oflag, ro, mode, name, attr); if (!error) { struct file *file = dentry_open(&path, oflag, current_cred()); @@ -928,13 +927,12 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, else error = PTR_ERR(file); } - path_put(&path); out_putfd: if (error) { put_unused_fd(fd); fd = error; } - inode_unlock(d_inode(root)); + end_creating(path.dentry, root); if (!ro) mnt_drop_write(mnt); out_putname: @@ -957,7 +955,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) int err; struct filename *name; struct dentry *dentry; - struct inode *inode = NULL; + struct inode *inode; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct vfsmount *mnt = ipc_ns->mq_mnt; @@ -969,26 +967,20 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) err = mnt_want_write(mnt); if (err) goto out_name; - inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT); - dentry = lookup_noperm(&QSTR(name->name), mnt->mnt_root); + dentry = start_removing_noperm(mnt->mnt_root, &QSTR(name->name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); - goto out_unlock; + goto out_drop_write; } inode = d_inode(dentry); - if (!inode) { - err = -ENOENT; - } else { - ihold(inode); - err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent), - dentry, NULL); - } - dput(dentry); - -out_unlock: - inode_unlock(d_inode(mnt->mnt_root)); + ihold(inode); + err = vfs_unlink(&nop_mnt_idmap, d_inode(mnt->mnt_root), + dentry, NULL); + end_removing(dentry); iput(inode); + +out_drop_write: mnt_drop_write(mnt); out_name: putname(name); -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown Sometimes smb2_create_link() needs to remove the target before creating the link. It uses ksmbd_vfs_kern_locked(), and is the only user of that interface. To match the new naming, that function is changed to ksmbd_vfs_kern_start_removing(), and related functions or flags are also renamed. The lock actually happens in ksmbd_vfs_path_lookup() and that is changed to use start_removing_noperm() - permission to perform lookup in the parent was already checked in vfs_path_parent_lookup(). Signed-off-by: NeilBrown --- fs/smb/server/smb2pdu.c | 6 +++--- fs/smb/server/vfs.c | 27 ++++++++++++--------------- fs/smb/server/vfs.h | 8 ++++---- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f901ae18e68a..94454e8826b0 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6092,8 +6092,8 @@ static int smb2_create_link(struct ksmbd_work *work, } ksmbd_debug(SMB, "target name is %s\n", target_name); - rc = ksmbd_vfs_kern_path_locked(work, link_name, LOOKUP_NO_SYMLINKS, - &path, 0); + rc = ksmbd_vfs_kern_path_start_removing(work, link_name, LOOKUP_NO_SYMLINKS, + &path, 0); if (rc) { if (rc != -ENOENT) goto out; @@ -6111,7 +6111,7 @@ static int smb2_create_link(struct ksmbd_work *work, ksmbd_debug(SMB, "link already exists\n"); goto out; } - ksmbd_vfs_kern_path_unlock(&path); + ksmbd_vfs_kern_path_end_removing(&path); } rc = ksmbd_vfs_link(work, target_name, link_name); if (rc) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 891ed2dc2b73..ea0a06b0ae44 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -69,7 +69,7 @@ int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child) static int ksmbd_vfs_path_lookup(struct ksmbd_share_config *share_conf, char *pathname, unsigned int flags, - struct path *path, bool do_lock) + struct path *path, bool for_remove) { struct qstr last; struct filename *filename __free(putname) = NULL; @@ -99,22 +99,20 @@ static int ksmbd_vfs_path_lookup(struct ksmbd_share_config *share_conf, return -ENOENT; } - if (do_lock) { + if (for_remove) { err = mnt_want_write(path->mnt); if (err) { path_put(path); return -ENOENT; } - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl(&last, path->dentry, 0); + d = start_removing_noperm(path->dentry, &last); if (!IS_ERR(d)) { dput(path->dentry); path->dentry = d; return 0; } - inode_unlock(path->dentry->d_inode); mnt_drop_write(path->mnt); path_put(path); return -ENOENT; @@ -1207,7 +1205,7 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, static int __ksmbd_vfs_kern_path(struct ksmbd_work *work, char *filepath, unsigned int flags, - struct path *path, bool caseless, bool do_lock) + struct path *path, bool caseless, bool for_remove) { struct ksmbd_share_config *share_conf = work->tcon->share_conf; struct path parent_path; @@ -1215,7 +1213,7 @@ int __ksmbd_vfs_kern_path(struct ksmbd_work *work, char *filepath, int err; retry: - err = ksmbd_vfs_path_lookup(share_conf, filepath, flags, path, do_lock); + err = ksmbd_vfs_path_lookup(share_conf, filepath, flags, path, for_remove); if (!err || !caseless) return err; @@ -1286,7 +1284,7 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *filepath, } /** - * ksmbd_vfs_kern_path_locked() - lookup a file and get path info + * ksmbd_vfs_kern_path_start_remove() - lookup a file and get path info prior to removal * @work: work * @filepath: file path that is relative to share * @flags: lookup flags @@ -1298,20 +1296,19 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *filepath, * filesystem will have been gained. * Return: 0 on if file was found, otherwise error */ -int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *filepath, - unsigned int flags, - struct path *path, bool caseless) +int ksmbd_vfs_kern_path_start_removing(struct ksmbd_work *work, char *filepath, + unsigned int flags, + struct path *path, bool caseless) { return __ksmbd_vfs_kern_path(work, filepath, flags, path, caseless, true); } -void ksmbd_vfs_kern_path_unlock(const struct path *path) +void ksmbd_vfs_kern_path_end_removing(const struct path *path) { - /* While lock is still held, ->d_parent is safe */ - inode_unlock(d_inode(path->dentry->d_parent)); + end_removing(path->dentry); mnt_drop_write(path->mnt); - path_put(path); + mntput(path->mnt); } struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index df6421b4590b..16ca29ee16e5 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -120,10 +120,10 @@ int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name, unsigned int flags, struct path *path, bool caseless); -int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, - unsigned int flags, - struct path *path, bool caseless); -void ksmbd_vfs_kern_path_unlock(const struct path *path); +int ksmbd_vfs_kern_path_start_removing(struct ksmbd_work *work, char *name, + unsigned int flags, + struct path *path, bool caseless); +void ksmbd_vfs_kern_path_end_removing(const struct path *path); struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, const char *name, unsigned int flags, -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown start_removing_dentry() is similar to start_removing() but instead of providing a name for lookup, the target dentry is given. start_removing_dentry() checks that the dentry is still hashed and in the parent, and if so it locks and increases the refcount so that end_removing() can be used to finish the operation. This is used in cachefiles, overlayfs, smb/server, and apparmor. There will be other users including ecryptfs. As start_removing_dentry() takes an extra reference to the dentry (to be put by end_removing()), there is no need to explicitly take an extra reference to stop d_delete() from using dentry_unlink_inode() to negate the dentry - as in cachefiles_delete_object(), and ksmbd_vfs_unlink(). cachefiles_bury_object() now gets an extra ref to the victim, which is drops. As it includes the needed end_removing() calls, the caller doesn't need them. Reviewed-by: Amir Goldstein Reviewed-by: Namjae Jeon (for ksmbd part) Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- Changes since v4 Callers of cachefiles_bury_object() were incorrectly calling end_removing() after that call. The dput() was needed, the unlock wasn't. The dput() has been effectively moved into cachefiles_bury_object() by removing a dget() which is now not needed. --- fs/cachefiles/interface.c | 11 +++++++---- fs/cachefiles/namei.c | 30 ++++++++++++++---------------- fs/cachefiles/volume.c | 9 ++++++--- fs/namei.c | 33 +++++++++++++++++++++++++++++++++ fs/overlayfs/dir.c | 10 ++++------ fs/overlayfs/readdir.c | 8 ++++---- fs/smb/server/vfs.c | 27 ++++----------------------- include/linux/namei.h | 2 ++ security/apparmor/apparmorfs.c | 8 ++++---- 9 files changed, 78 insertions(+), 60 deletions(-) diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 3e63cfe15874..a08250d244ea 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -428,11 +429,13 @@ static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie) if (!old_tmpfile) { struct cachefiles_volume *volume = object->volume; struct dentry *fan = volume->fanout[(u8)cookie->key_hash]; + struct dentry *obj; - inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); - cachefiles_bury_object(volume->cache, object, fan, - old_file->f_path.dentry, - FSCACHE_OBJECT_INVALIDATED); + obj = start_removing_dentry(fan, old_file->f_path.dentry); + if (!IS_ERR(obj)) + cachefiles_bury_object(volume->cache, object, + fan, obj, + FSCACHE_OBJECT_INVALIDATED); } fput(old_file); } diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index c7f0c6ab9b88..0104ac00485d 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -261,6 +261,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache, * - Directory backed objects are stuffed into the graveyard for userspace to * delete * On entry dir must be locked. It will be unlocked on exit. + * On entry there must be at least 2 refs on rep, one will be dropped on exit. */ int cachefiles_bury_object(struct cachefiles_cache *cache, struct cachefiles_object *object, @@ -275,12 +276,6 @@ int cachefiles_bury_object(struct cachefiles_cache *cache, _enter(",'%pd','%pd'", dir, rep); - /* end_removing() will dput() @rep but we need to keep - * a ref, so take one now. This also stops the dentry - * being negated when unlinked which we need. - */ - dget(rep); - if (rep->d_parent != dir) { end_removing(rep); _leave(" = -ESTALE"); @@ -425,13 +420,12 @@ int cachefiles_delete_object(struct cachefiles_object *object, _enter(",OBJ%x{%pD}", object->debug_id, object->file); - /* Stop the dentry being negated if it's only pinned by a file struct. */ - dget(dentry); - - inode_lock_nested(d_backing_inode(fan), I_MUTEX_PARENT); - ret = cachefiles_unlink(volume->cache, object, fan, dentry, why); - inode_unlock(d_backing_inode(fan)); - dput(dentry); + dentry = start_removing_dentry(fan, dentry); + if (IS_ERR(dentry)) + ret = PTR_ERR(dentry); + else + ret = cachefiles_unlink(volume->cache, object, fan, dentry, why); + end_removing(dentry); return ret; } @@ -644,9 +638,13 @@ bool cachefiles_look_up_object(struct cachefiles_object *object) if (!d_is_reg(dentry)) { pr_err("%pd is not a file\n", dentry); - inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); - ret = cachefiles_bury_object(volume->cache, object, fan, dentry, - FSCACHE_OBJECT_IS_WEIRD); + struct dentry *de = start_removing_dentry(fan, dentry); + if (IS_ERR(de)) + ret = PTR_ERR(de); + else + ret = cachefiles_bury_object(volume->cache, object, + fan, de, + FSCACHE_OBJECT_IS_WEIRD); dput(dentry); if (ret < 0) return false; diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c index 781aac4ef274..90ba926f488e 100644 --- a/fs/cachefiles/volume.c +++ b/fs/cachefiles/volume.c @@ -7,6 +7,7 @@ #include #include +#include #include "internal.h" #include @@ -58,9 +59,11 @@ void cachefiles_acquire_volume(struct fscache_volume *vcookie) if (ret < 0) { if (ret != -ESTALE) goto error_dir; - inode_lock_nested(d_inode(cache->store), I_MUTEX_PARENT); - cachefiles_bury_object(cache, NULL, cache->store, vdentry, - FSCACHE_VOLUME_IS_WEIRD); + vdentry = start_removing_dentry(cache->store, vdentry); + if (!IS_ERR(vdentry)) + cachefiles_bury_object(cache, NULL, cache->store, + vdentry, + FSCACHE_VOLUME_IS_WEIRD); cachefiles_put_directory(volume->dentry); cond_resched(); goto retry; diff --git a/fs/namei.c b/fs/namei.c index da01b828ede6..729b42fb143b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3323,6 +3323,39 @@ struct dentry *start_removing_noperm(struct dentry *parent, } EXPORT_SYMBOL(start_removing_noperm); +/** + * start_removing_dentry - prepare to remove a given dentry + * @parent: directory from which dentry should be removed + * @child: the dentry to be removed + * + * A lock is taken to protect the dentry again other dirops and + * the validity of the dentry is checked: correct parent and still hashed. + * + * If the dentry is valid and positive, a reference is taken and + * returned. If not an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * Returns: the valid dentry, or an error. + */ +struct dentry *start_removing_dentry(struct dentry *parent, + struct dentry *child) +{ + inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); + if (unlikely(IS_DEADDIR(parent->d_inode) || + child->d_parent != parent || + d_unhashed(child))) { + inode_unlock(parent->d_inode); + return ERR_PTR(-EINVAL); + } + if (d_is_negative(child)) { + inode_unlock(parent->d_inode); + return ERR_PTR(-ENOENT); + } + return dget(child); +} +EXPORT_SYMBOL(start_removing_dentry); + #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 20682afdbd20..6d1d0e94e287 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -47,14 +47,12 @@ static int ovl_cleanup_locked(struct ovl_fs *ofs, struct inode *wdir, int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, struct dentry *wdentry) { - int err; - - err = ovl_parent_lock(workdir, wdentry); - if (err) - return err; + wdentry = start_removing_dentry(workdir, wdentry); + if (IS_ERR(wdentry)) + return PTR_ERR(wdentry); ovl_cleanup_locked(ofs, workdir->d_inode, wdentry); - ovl_parent_unlock(workdir); + end_removing(wdentry); return 0; } diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 1e9792cc557b..77ecc39fc33a 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1242,11 +1242,11 @@ int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, if (!d_is_dir(dentry) || level > 1) return ovl_cleanup(ofs, parent, dentry); - err = ovl_parent_lock(parent, dentry); - if (err) - return err; + dentry = start_removing_dentry(parent, dentry); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); err = ovl_do_rmdir(ofs, parent->d_inode, dentry); - ovl_parent_unlock(parent); + end_removing(dentry); if (err) { struct path path = { .mnt = mnt, .dentry = dentry }; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index ea0a06b0ae44..148c65d59e42 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -49,24 +49,6 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work, i_uid_write(inode, i_uid_read(parent_inode)); } -/** - * ksmbd_vfs_lock_parent() - lock parent dentry if it is stable - * @parent: parent dentry - * @child: child dentry - * - * Returns: %0 on success, %-ENOENT if the parent dentry is not stable - */ -int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child) -{ - inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); - if (child->d_parent != parent) { - inode_unlock(d_inode(parent)); - return -ENOENT; - } - - return 0; -} - static int ksmbd_vfs_path_lookup(struct ksmbd_share_config *share_conf, char *pathname, unsigned int flags, struct path *path, bool for_remove) @@ -1082,18 +1064,17 @@ int ksmbd_vfs_unlink(struct file *filp) return err; dir = dget_parent(dentry); - err = ksmbd_vfs_lock_parent(dir, dentry); - if (err) + dentry = start_removing_dentry(dir, dentry); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) goto out; - dget(dentry); if (S_ISDIR(d_inode(dentry)->i_mode)) err = vfs_rmdir(idmap, d_inode(dir), dentry); else err = vfs_unlink(idmap, d_inode(dir), dentry, NULL); - dput(dentry); - inode_unlock(d_inode(dir)); + end_removing(dentry); if (err) ksmbd_debug(VFS, "failed to delete, err %d\n", err); out: diff --git a/include/linux/namei.h b/include/linux/namei.h index 0441f5921f87..d089e4e8fdd0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -95,6 +95,8 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, struct qstr *name); struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name); struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name); +struct dentry *start_removing_dentry(struct dentry *parent, + struct dentry *child); /** * end_creating - finish action started with start_creating diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 391a586d0557..9d08d103f142 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -355,17 +355,17 @@ static void aafs_remove(struct dentry *dentry) if (!dentry || IS_ERR(dentry)) return; + /* ->d_parent is stable as rename is not supported */ dir = d_inode(dentry->d_parent); - inode_lock(dir); - if (simple_positive(dentry)) { + dentry = start_removing_dentry(dentry->d_parent, dentry); + if (!IS_ERR(dentry) && simple_positive(dentry)) { if (d_is_dir(dentry)) simple_rmdir(dir, dentry); else simple_unlink(dir, dentry); d_delete(dentry); - dput(dentry); } - inode_unlock(dir); + end_removing(dentry); simple_release_fs(&aafs_mnt, &aafs_count); } -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown These are similar to start_creating() and start_removing(), but allow a fatal signal to abort waiting for the lock. They are used in btrfs for subvol creation and removal. btrfs_may_create() no longer needs IS_DEADDIR() and start_creating_killable() includes that check. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- fs/btrfs/ioctl.c | 41 +++++++--------------- fs/namei.c | 80 +++++++++++++++++++++++++++++++++++++++++-- include/linux/namei.h | 6 ++++ 3 files changed, 95 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8cb7d5a462ef..d0c3bb0423bb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -904,14 +904,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent, struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len); int ret; - ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); - if (ret == -EINTR) - return ret; - - dentry = lookup_one(idmap, qname, parent); - ret = PTR_ERR(dentry); + dentry = start_creating_killable(idmap, parent, qname); if (IS_ERR(dentry)) - goto out_unlock; + return PTR_ERR(dentry); ret = btrfs_may_create(idmap, dir, dentry); if (ret) @@ -940,9 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent, out_up_read: up_read(&fs_info->subvol_sem); out_dput: - dput(dentry); -out_unlock: - btrfs_inode_unlock(BTRFS_I(dir), 0); + end_creating(dentry, parent); return ret; } @@ -2417,18 +2410,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto free_subvol_name; } - ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); - if (ret == -EINTR) - goto free_subvol_name; - dentry = lookup_one(idmap, &QSTR(subvol_name), parent); + dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name)); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); - goto out_unlock_dir; - } - - if (d_really_is_negative(dentry)) { - ret = -ENOENT; - goto out_dput; + goto out_end_removing; } inode = d_inode(dentry); @@ -2449,7 +2434,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, */ ret = -EPERM; if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED)) - goto out_dput; + goto out_end_removing; /* * Do not allow deletion if the parent dir is the same @@ -2460,21 +2445,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, */ ret = -EINVAL; if (root == dest) - goto out_dput; + goto out_end_removing; ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC); if (ret) - goto out_dput; + goto out_end_removing; } /* check if subvolume may be deleted by a user */ ret = btrfs_may_delete(idmap, dir, dentry, 1); if (ret) - goto out_dput; + goto out_end_removing; if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { ret = -EINVAL; - goto out_dput; + goto out_end_removing; } btrfs_inode_lock(BTRFS_I(inode), 0); @@ -2483,10 +2468,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, if (!ret) d_delete_notify(dir, dentry); -out_dput: - dput(dentry); -out_unlock_dir: - btrfs_inode_unlock(BTRFS_I(dir), 0); +out_end_removing: + end_removing(dentry); free_subvol_name: kfree(subvol_name_ptr); free_parent: diff --git a/fs/namei.c b/fs/namei.c index 729b42fb143b..e70d056b9543 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2778,19 +2778,33 @@ static int filename_parentat(int dfd, struct filename *name, * Returns: a locked dentry, or an error. * */ -struct dentry *start_dirop(struct dentry *parent, struct qstr *name, - unsigned int lookup_flags) +static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name, + unsigned int lookup_flags, + unsigned int state) { struct dentry *dentry; struct inode *dir = d_inode(parent); - inode_lock_nested(dir, I_MUTEX_PARENT); + if (state == TASK_KILLABLE) { + int ret = down_write_killable_nested(&dir->i_rwsem, + I_MUTEX_PARENT); + if (ret) + return ERR_PTR(ret); + } else { + inode_lock_nested(dir, I_MUTEX_PARENT); + } dentry = lookup_one_qstr_excl(name, parent, lookup_flags); if (IS_ERR(dentry)) inode_unlock(dir); return dentry; } +struct dentry *start_dirop(struct dentry *parent, struct qstr *name, + unsigned int lookup_flags) +{ + return __start_dirop(parent, name, lookup_flags, TASK_NORMAL); +} + /** * end_dirop - signal completion of a dirop * @de: the dentry which was returned by start_dirop or similar. @@ -3275,6 +3289,66 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, } EXPORT_SYMBOL(start_removing); +/** + * start_creating_killable - prepare to create a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to prepare to create the name + * @name: the name to be created + * + * Locks are taken and a lookup in performed prior to creating + * an object in a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name already exists, a positive dentry is returned. + * + * If a signal is received or was already pending, the function aborts + * with -EINTR; + * + * Returns: a negative or positive dentry, or an error. + */ +struct dentry *start_creating_killable(struct mnt_idmap *idmap, + struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return __start_dirop(parent, name, LOOKUP_CREATE, TASK_KILLABLE); +} +EXPORT_SYMBOL(start_creating_killable); + +/** + * start_removing_killable - prepare to remove a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to find the name + * @name: the name to be removed + * + * Locks are taken and a lookup in performed prior to removing + * an object from a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name doesn't exist, an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * If a signal is received or was already pending, the function aborts + * with -EINTR; + * + * Returns: a positive dentry, or an error. + */ +struct dentry *start_removing_killable(struct mnt_idmap *idmap, + struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return __start_dirop(parent, name, 0, TASK_KILLABLE); +} +EXPORT_SYMBOL(start_removing_killable); + /** * start_creating_noperm - prepare to create a given name without permission checking * @parent: directory in which to prepare to create the name diff --git a/include/linux/namei.h b/include/linux/namei.h index d089e4e8fdd0..196c66156a8a 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -93,6 +93,12 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, struct qstr *name); struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, struct qstr *name); +struct dentry *start_creating_killable(struct mnt_idmap *idmap, + struct dentry *parent, + struct qstr *name); +struct dentry *start_removing_killable(struct mnt_idmap *idmap, + struct dentry *parent, + struct qstr *name); struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name); struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name); struct dentry *start_removing_dentry(struct dentry *parent, -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown start_renaming() combines name lookup and locking to prepare for rename. It is used when two names need to be looked up as in nfsd and overlayfs - cases where one or both dentries are already available will be handled separately. __start_renaming() avoids the inode_permission check and hash calculation and is suitable after filename_parentat() in do_renameat2(). It subsumes quite a bit of code from that function. start_renaming() does calculate the hash and check X permission and is suitable elsewhere: - nfsd_rename() - ovl_rename() In ovl, ovl_do_rename_rd() is factored out of ovl_do_rename(), which itself will be gone by the end of the series. Acked-by: Chuck Lever (for nfsd parts) Reviewed-by: Jeff Layton Reviewed-by: Amir Goldstein Signed-off-by: NeilBrown -- Changes since v3: - added missig dput() in ovl_rename when "whiteout" is not-NULL. Changes since v2: - in __start_renaming() some label have been renamed, and err is always set before a "goto out_foo" rather than passing the error in a dentry*. - ovl_do_rename() changed to call the new ovl_do_rename_rd() rather than keeping duplicate code - code around ovl_cleanup() call in ovl_rename() restructured. --- fs/namei.c | 197 ++++++++++++++++++++++++++++----------- fs/nfsd/vfs.c | 73 +++++---------- fs/overlayfs/dir.c | 74 +++++++-------- fs/overlayfs/overlayfs.h | 23 +++-- include/linux/namei.h | 3 + 5 files changed, 218 insertions(+), 152 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index e70d056b9543..bad6c9d540f9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3667,6 +3667,129 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) } EXPORT_SYMBOL(unlock_rename); +/** + * __start_renaming - lookup and lock names for rename + * @rd: rename data containing parent and flags, and + * for receiving found dentries + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, + * LOOKUP_NO_SYMLINKS etc). + * @old_last: name of object in @rd.old_parent + * @new_last: name of object in @rd.new_parent + * + * Look up two names and ensure locks are in place for + * rename. + * + * On success the found dentries are stored in @rd.old_dentry, + * @rd.new_dentry. These references and the lock are dropped by + * end_renaming(). + * + * The passed in qstrs must have the hash calculated, and no permission + * checking is performed. + * + * Returns: zero or an error. + */ +static int +__start_renaming(struct renamedata *rd, int lookup_flags, + struct qstr *old_last, struct qstr *new_last) +{ + struct dentry *trap; + struct dentry *d1, *d2; + int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; + int err; + + if (rd->flags & RENAME_EXCHANGE) + target_flags = 0; + if (rd->flags & RENAME_NOREPLACE) + target_flags |= LOOKUP_EXCL; + + trap = lock_rename(rd->old_parent, rd->new_parent); + if (IS_ERR(trap)) + return PTR_ERR(trap); + + d1 = lookup_one_qstr_excl(old_last, rd->old_parent, + lookup_flags); + err = PTR_ERR(d1); + if (IS_ERR(d1)) + goto out_unlock; + + d2 = lookup_one_qstr_excl(new_last, rd->new_parent, + lookup_flags | target_flags); + err = PTR_ERR(d2); + if (IS_ERR(d2)) + goto out_dput_d1; + + if (d1 == trap) { + /* source is an ancestor of target */ + err = -EINVAL; + goto out_dput_d2; + } + + if (d2 == trap) { + /* target is an ancestor of source */ + if (rd->flags & RENAME_EXCHANGE) + err = -EINVAL; + else + err = -ENOTEMPTY; + goto out_dput_d2; + } + + rd->old_dentry = d1; + rd->new_dentry = d2; + return 0; + +out_dput_d2: + dput(d2); +out_dput_d1: + dput(d1); +out_unlock: + unlock_rename(rd->old_parent, rd->new_parent); + return err; +} + +/** + * start_renaming - lookup and lock names for rename with permission checking + * @rd: rename data containing parent and flags, and + * for receiving found dentries + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, + * LOOKUP_NO_SYMLINKS etc). + * @old_last: name of object in @rd.old_parent + * @new_last: name of object in @rd.new_parent + * + * Look up two names and ensure locks are in place for + * rename. + * + * On success the found dentries are stored in @rd.old_dentry, + * @rd.new_dentry. These references and the lock are dropped by + * end_renaming(). + * + * The passed in qstrs need not have the hash calculated, and basic + * eXecute permission checking is performed against @rd.mnt_idmap. + * + * Returns: zero or an error. + */ +int start_renaming(struct renamedata *rd, int lookup_flags, + struct qstr *old_last, struct qstr *new_last) +{ + int err; + + err = lookup_one_common(rd->mnt_idmap, old_last, rd->old_parent); + if (err) + return err; + err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent); + if (err) + return err; + return __start_renaming(rd, lookup_flags, old_last, new_last); +} +EXPORT_SYMBOL(start_renaming); + +void end_renaming(struct renamedata *rd) +{ + unlock_rename(rd->old_parent, rd->new_parent); + dput(rd->old_dentry); + dput(rd->new_dentry); +} +EXPORT_SYMBOL(end_renaming); + /** * vfs_prepare_mode - prepare the mode to be used for a new inode * @idmap: idmap of the mount the inode was found from @@ -5504,14 +5627,11 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, struct filename *to, unsigned int flags) { struct renamedata rd; - struct dentry *old_dentry, *new_dentry; - struct dentry *trap; struct path old_path, new_path; struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode = NULL; - unsigned int lookup_flags = 0, target_flags = - LOOKUP_RENAME_TARGET | LOOKUP_CREATE; + unsigned int lookup_flags = 0; bool should_retry = false; int error = -EINVAL; @@ -5522,11 +5642,6 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, (flags & RENAME_EXCHANGE)) goto put_names; - if (flags & RENAME_EXCHANGE) - target_flags = 0; - if (flags & RENAME_NOREPLACE) - target_flags |= LOOKUP_EXCL; - retry: error = filename_parentat(olddfd, from, lookup_flags, &old_path, &old_last, &old_type); @@ -5556,66 +5671,40 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, goto exit2; retry_deleg: - trap = lock_rename(new_path.dentry, old_path.dentry); - if (IS_ERR(trap)) { - error = PTR_ERR(trap); + rd.old_parent = old_path.dentry; + rd.mnt_idmap = mnt_idmap(old_path.mnt); + rd.new_parent = new_path.dentry; + rd.delegated_inode = &delegated_inode; + rd.flags = flags; + + error = __start_renaming(&rd, lookup_flags, &old_last, &new_last); + if (error) goto exit_lock_rename; - } - old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry, - lookup_flags); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; - new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, - lookup_flags | target_flags); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; if (flags & RENAME_EXCHANGE) { - if (!d_is_dir(new_dentry)) { + if (!d_is_dir(rd.new_dentry)) { error = -ENOTDIR; if (new_last.name[new_last.len]) - goto exit5; + goto exit_unlock; } } /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!d_is_dir(old_dentry)) { + if (!d_is_dir(rd.old_dentry)) { error = -ENOTDIR; if (old_last.name[old_last.len]) - goto exit5; + goto exit_unlock; if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len]) - goto exit5; - } - /* source should not be ancestor of target */ - error = -EINVAL; - if (old_dentry == trap) - goto exit5; - /* target should not be an ancestor of source */ - if (!(flags & RENAME_EXCHANGE)) - error = -ENOTEMPTY; - if (new_dentry == trap) - goto exit5; + goto exit_unlock; + } - error = security_path_rename(&old_path, old_dentry, - &new_path, new_dentry, flags); + error = security_path_rename(&old_path, rd.old_dentry, + &new_path, rd.new_dentry, flags); if (error) - goto exit5; + goto exit_unlock; - rd.old_parent = old_path.dentry; - rd.old_dentry = old_dentry; - rd.mnt_idmap = mnt_idmap(old_path.mnt); - rd.new_parent = new_path.dentry; - rd.new_dentry = new_dentry; - rd.delegated_inode = &delegated_inode; - rd.flags = flags; error = vfs_rename(&rd); -exit5: - dput(new_dentry); -exit4: - dput(old_dentry); -exit3: - unlock_rename(new_path.dentry, old_path.dentry); +exit_unlock: + end_renaming(&rd); exit_lock_rename: if (delegated_inode) { error = break_deleg_wait(&delegated_inode); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6291c371caa7..a993f1e54182 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1885,11 +1885,12 @@ __be32 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, struct svc_fh *tfhp, char *tname, int tlen) { - struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; + struct dentry *fdentry, *tdentry; int type = S_IFDIR; + struct renamedata rd = {}; __be32 err; int host_err; - bool close_cached = false; + struct dentry *close_cached; trace_nfsd_vfs_rename(rqstp, ffhp, tfhp, fname, flen, tname, tlen); @@ -1915,15 +1916,22 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, goto out; retry: + close_cached = NULL; host_err = fh_want_write(ffhp); if (host_err) { err = nfserrno(host_err); goto out; } - trap = lock_rename(tdentry, fdentry); - if (IS_ERR(trap)) { - err = nfserr_xdev; + rd.mnt_idmap = &nop_mnt_idmap; + rd.old_parent = fdentry; + rd.new_parent = tdentry; + + host_err = start_renaming(&rd, 0, &QSTR_LEN(fname, flen), + &QSTR_LEN(tname, tlen)); + + if (host_err) { + err = nfserrno(host_err); goto out_want_write; } err = fh_fill_pre_attrs(ffhp); @@ -1933,48 +1941,23 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (err != nfs_ok) goto out_unlock; - odentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), fdentry); - host_err = PTR_ERR(odentry); - if (IS_ERR(odentry)) - goto out_nfserr; + type = d_inode(rd.old_dentry)->i_mode & S_IFMT; + + if (d_inode(rd.new_dentry)) + type = d_inode(rd.new_dentry)->i_mode & S_IFMT; - host_err = -ENOENT; - if (d_really_is_negative(odentry)) - goto out_dput_old; - host_err = -EINVAL; - if (odentry == trap) - goto out_dput_old; - type = d_inode(odentry)->i_mode & S_IFMT; - - ndentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(tname, tlen), tdentry); - host_err = PTR_ERR(ndentry); - if (IS_ERR(ndentry)) - goto out_dput_old; - if (d_inode(ndentry)) - type = d_inode(ndentry)->i_mode & S_IFMT; - host_err = -ENOTEMPTY; - if (ndentry == trap) - goto out_dput_new; - - if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) && - nfsd_has_cached_files(ndentry)) { - close_cached = true; - goto out_dput_old; + if ((rd.new_dentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) && + nfsd_has_cached_files(rd.new_dentry)) { + close_cached = dget(rd.new_dentry); + goto out_unlock; } else { - struct renamedata rd = { - .mnt_idmap = &nop_mnt_idmap, - .old_parent = fdentry, - .old_dentry = odentry, - .new_parent = tdentry, - .new_dentry = ndentry, - }; int retries; for (retries = 1;;) { host_err = vfs_rename(&rd); if (host_err != -EAGAIN || !retries--) break; - if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry))) + if (!nfsd_wait_for_delegreturn(rqstp, d_inode(rd.old_dentry))) break; } if (!host_err) { @@ -1983,11 +1966,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, host_err = commit_metadata(ffhp); } } - out_dput_new: - dput(ndentry); - out_dput_old: - dput(odentry); - out_nfserr: if (host_err == -EBUSY) { /* * See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME @@ -2006,7 +1984,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, fh_fill_post_attrs(tfhp); } out_unlock: - unlock_rename(tdentry, fdentry); + end_renaming(&rd); out_want_write: fh_drop_write(ffhp); @@ -2017,9 +1995,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, * until this point and then reattempt the whole shebang. */ if (close_cached) { - close_cached = false; - nfsd_close_cached_files(ndentry); - dput(ndentry); + nfsd_close_cached_files(close_cached); + dput(close_cached); goto retry; } out: diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 6d1d0e94e287..cf6fc48459f3 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1124,9 +1124,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, int err; struct dentry *old_upperdir; struct dentry *new_upperdir; - struct dentry *olddentry = NULL; - struct dentry *newdentry = NULL; - struct dentry *trap, *de; + struct renamedata rd = {}; bool old_opaque; bool new_opaque; bool cleanup_whiteout = false; @@ -1136,6 +1134,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, bool new_is_dir = d_is_dir(new); bool samedir = olddir == newdir; struct dentry *opaquedir = NULL; + struct dentry *whiteout = NULL; const struct cred *old_cred = NULL; struct ovl_fs *ofs = OVL_FS(old->d_sb); LIST_HEAD(list); @@ -1233,29 +1232,21 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, } } - trap = lock_rename(new_upperdir, old_upperdir); - if (IS_ERR(trap)) { - err = PTR_ERR(trap); - goto out_revert_creds; - } + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = old_upperdir; + rd.new_parent = new_upperdir; + rd.flags = flags; - de = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir, - old->d_name.len); - err = PTR_ERR(de); - if (IS_ERR(de)) - goto out_unlock; - olddentry = de; + err = start_renaming(&rd, 0, + &QSTR_LEN(old->d_name.name, old->d_name.len), + &QSTR_LEN(new->d_name.name, new->d_name.len)); - err = -ESTALE; - if (!ovl_matches_upper(old, olddentry)) - goto out_unlock; + if (err) + goto out_revert_creds; - de = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir, - new->d_name.len); - err = PTR_ERR(de); - if (IS_ERR(de)) + err = -ESTALE; + if (!ovl_matches_upper(old, rd.old_dentry)) goto out_unlock; - newdentry = de; old_opaque = ovl_dentry_is_opaque(old); new_opaque = ovl_dentry_is_opaque(new); @@ -1263,15 +1254,15 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, err = -ESTALE; if (d_inode(new) && ovl_dentry_upper(new)) { if (opaquedir) { - if (newdentry != opaquedir) + if (rd.new_dentry != opaquedir) goto out_unlock; } else { - if (!ovl_matches_upper(new, newdentry)) + if (!ovl_matches_upper(new, rd.new_dentry)) goto out_unlock; } } else { - if (!d_is_negative(newdentry)) { - if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry)) + if (!d_is_negative(rd.new_dentry)) { + if (!new_opaque || !ovl_upper_is_whiteout(ofs, rd.new_dentry)) goto out_unlock; } else { if (flags & RENAME_EXCHANGE) @@ -1279,19 +1270,14 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, } } - if (olddentry == trap) - goto out_unlock; - if (newdentry == trap) - goto out_unlock; - - if (olddentry->d_inode == newdentry->d_inode) + if (rd.old_dentry->d_inode == rd.new_dentry->d_inode) goto out_unlock; err = 0; if (ovl_type_merge_or_lower(old)) err = ovl_set_redirect(old, samedir); else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent)) - err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); + err = ovl_set_opaque_xerr(old, rd.old_dentry, -EXDEV); if (err) goto out_unlock; @@ -1299,18 +1285,24 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, err = ovl_set_redirect(new, samedir); else if (!overwrite && new_is_dir && !new_opaque && ovl_type_merge(old->d_parent)) - err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); + err = ovl_set_opaque_xerr(new, rd.new_dentry, -EXDEV); if (err) goto out_unlock; - err = ovl_do_rename(ofs, old_upperdir, olddentry, - new_upperdir, newdentry, flags); - unlock_rename(new_upperdir, old_upperdir); + err = ovl_do_rename_rd(&rd); + + if (!err && cleanup_whiteout) + whiteout = dget(rd.new_dentry); + + end_renaming(&rd); + if (err) goto out_revert_creds; - if (cleanup_whiteout) - ovl_cleanup(ofs, old_upperdir, newdentry); + if (whiteout) { + ovl_cleanup(ofs, old_upperdir, whiteout); + dput(whiteout); + } if (overwrite && d_inode(new)) { if (new_is_dir) @@ -1336,14 +1328,12 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, else ovl_drop_write(old); out: - dput(newdentry); - dput(olddentry); dput(opaquedir); ovl_cache_free(&list); return err; out_unlock: - unlock_rename(new_upperdir, old_upperdir); + end_renaming(&rd); goto out_revert_creds; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 49ad65f829dc..3cc85a893b5c 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -355,11 +355,24 @@ static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry, return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name); } +static inline int ovl_do_rename_rd(struct renamedata *rd) +{ + int err; + + pr_debug("rename(%pd2, %pd2, 0x%x)\n", rd->old_dentry, rd->new_dentry, + rd->flags); + err = vfs_rename(rd); + if (err) { + pr_debug("...rename(%pd2, %pd2, ...) = %i\n", + rd->old_dentry, rd->new_dentry, err); + } + return err; +} + static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir, struct dentry *olddentry, struct dentry *newdir, struct dentry *newdentry, unsigned int flags) { - int err; struct renamedata rd = { .mnt_idmap = ovl_upper_mnt_idmap(ofs), .old_parent = olddir, @@ -369,13 +382,7 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir, .flags = flags, }; - pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags); - err = vfs_rename(&rd); - if (err) { - pr_debug("...rename(%pd2, %pd2, ...) = %i\n", - olddentry, newdentry, err); - } - return err; + return ovl_do_rename_rd(&rd); } static inline int ovl_do_whiteout(struct ovl_fs *ofs, diff --git a/include/linux/namei.h b/include/linux/namei.h index 196c66156a8a..7fdd9fdcbd2b 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -157,6 +157,9 @@ extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +int start_renaming(struct renamedata *rd, int lookup_flags, + struct qstr *old_last, struct qstr *new_last); +void end_renaming(struct renamedata *rd); /** * mode_strip_umask - handle vfs umask stripping -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown Several callers perform a rename on a dentry they already have, and only require lookup for the target name. This includes smb/server and a few different places in overlayfs. start_renaming_dentry() performs the required lookup and takes the required lock using lock_rename_child() It is used in three places in overlayfs and in ksmbd_vfs_rename(). In the ksmbd case, the parent of the source is not important - the source must be renamed from wherever it is. So start_renaming_dentry() allows rd->old_parent to be NULL and only checks it if it is non-NULL. On success rd->old_parent will be the parent of old_dentry with an extra reference taken. Other start_renaming function also now take the extra reference and end_renaming() now drops this reference as well. ovl_lookup_temp(), ovl_parent_lock(), and ovl_parent_unlock() are all removed as they are no longer needed. OVL_TEMPNAME_SIZE and ovl_tempname() are now declared in overlayfs.h so that ovl_check_rename_whiteout() can access them. ovl_copy_up_workdir() now always cleans up on error. Reviewed-by: Namjae Jeon (for ksmbd part) Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- fs/namei.c | 108 ++++++++++++++++++++++++++++++++++++--- fs/overlayfs/copy_up.c | 54 +++++++++----------- fs/overlayfs/dir.c | 19 +------ fs/overlayfs/overlayfs.h | 8 +-- fs/overlayfs/super.c | 22 ++++---- fs/overlayfs/util.c | 11 ---- fs/smb/server/vfs.c | 60 ++++------------------ include/linux/namei.h | 2 + 8 files changed, 150 insertions(+), 134 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index bad6c9d540f9..4b740048df97 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3669,7 +3669,7 @@ EXPORT_SYMBOL(unlock_rename); /** * __start_renaming - lookup and lock names for rename - * @rd: rename data containing parent and flags, and + * @rd: rename data containing parents and flags, and * for receiving found dentries * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, * LOOKUP_NO_SYMLINKS etc). @@ -3680,8 +3680,8 @@ EXPORT_SYMBOL(unlock_rename); * rename. * * On success the found dentries are stored in @rd.old_dentry, - * @rd.new_dentry. These references and the lock are dropped by - * end_renaming(). + * @rd.new_dentry and an extra ref is taken on @rd.old_parent. + * These references and the lock are dropped by end_renaming(). * * The passed in qstrs must have the hash calculated, and no permission * checking is performed. @@ -3735,6 +3735,7 @@ __start_renaming(struct renamedata *rd, int lookup_flags, rd->old_dentry = d1; rd->new_dentry = d2; + dget(rd->old_parent); return 0; out_dput_d2: @@ -3748,7 +3749,7 @@ __start_renaming(struct renamedata *rd, int lookup_flags, /** * start_renaming - lookup and lock names for rename with permission checking - * @rd: rename data containing parent and flags, and + * @rd: rename data containing parents and flags, and * for receiving found dentries * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, * LOOKUP_NO_SYMLINKS etc). @@ -3759,8 +3760,8 @@ __start_renaming(struct renamedata *rd, int lookup_flags, * rename. * * On success the found dentries are stored in @rd.old_dentry, - * @rd.new_dentry. These references and the lock are dropped by - * end_renaming(). + * @rd.new_dentry. Also the refcount on @rd->old_parent is increased. + * These references and the lock are dropped by end_renaming(). * * The passed in qstrs need not have the hash calculated, and basic * eXecute permission checking is performed against @rd.mnt_idmap. @@ -3782,11 +3783,106 @@ int start_renaming(struct renamedata *rd, int lookup_flags, } EXPORT_SYMBOL(start_renaming); +static int +__start_renaming_dentry(struct renamedata *rd, int lookup_flags, + struct dentry *old_dentry, struct qstr *new_last) +{ + struct dentry *trap; + struct dentry *d2; + int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; + int err; + + if (rd->flags & RENAME_EXCHANGE) + target_flags = 0; + if (rd->flags & RENAME_NOREPLACE) + target_flags |= LOOKUP_EXCL; + + /* Already have the dentry - need to be sure to lock the correct parent */ + trap = lock_rename_child(old_dentry, rd->new_parent); + if (IS_ERR(trap)) + return PTR_ERR(trap); + if (d_unhashed(old_dentry) || + (rd->old_parent && rd->old_parent != old_dentry->d_parent)) { + /* dentry was removed, or moved and explicit parent requested */ + err = -EINVAL; + goto out_unlock; + } + + d2 = lookup_one_qstr_excl(new_last, rd->new_parent, + lookup_flags | target_flags); + err = PTR_ERR(d2); + if (IS_ERR(d2)) + goto out_unlock; + + if (old_dentry == trap) { + /* source is an ancestor of target */ + err = -EINVAL; + goto out_dput_d2; + } + + if (d2 == trap) { + /* target is an ancestor of source */ + if (rd->flags & RENAME_EXCHANGE) + err = -EINVAL; + else + err = -ENOTEMPTY; + goto out_dput_d2; + } + + rd->old_dentry = dget(old_dentry); + rd->new_dentry = d2; + rd->old_parent = dget(old_dentry->d_parent); + return 0; + +out_dput_d2: + dput(d2); +out_unlock: + unlock_rename(old_dentry->d_parent, rd->new_parent); + return err; +} + +/** + * start_renaming_dentry - lookup and lock name for rename with permission checking + * @rd: rename data containing parents and flags, and + * for receiving found dentries + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, + * LOOKUP_NO_SYMLINKS etc). + * @old_dentry: dentry of name to move + * @new_last: name of target in @rd.new_parent + * + * Look up target name and ensure locks are in place for + * rename. + * + * On success the found dentry is stored in @rd.new_dentry and + * @rd.old_parent is confirmed to be the parent of @old_dentry. If it + * was originally %NULL, it is set. In either case a reference is taken + * so that end_renaming() can have a stable reference to unlock. + * + * References and the lock can be dropped with end_renaming() + * + * The passed in qstr need not have the hash calculated, and basic + * eXecute permission checking is performed against @rd.mnt_idmap. + * + * Returns: zero or an error. + */ +int start_renaming_dentry(struct renamedata *rd, int lookup_flags, + struct dentry *old_dentry, struct qstr *new_last) +{ + int err; + + err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent); + if (err) + return err; + return __start_renaming_dentry(rd, lookup_flags, old_dentry, new_last); +} +EXPORT_SYMBOL(start_renaming_dentry); + void end_renaming(struct renamedata *rd) { unlock_rename(rd->old_parent, rd->new_parent); dput(rd->old_dentry); dput(rd->new_dentry); + dput(rd->old_parent); } EXPORT_SYMBOL(end_renaming); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index e2bdac4317e7..9911a346b477 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -523,8 +523,8 @@ static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh, { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *indexdir = ovl_indexdir(dentry->d_sb); - struct dentry *index = NULL; struct dentry *temp = NULL; + struct renamedata rd = {}; struct qstr name = { }; int err; @@ -556,17 +556,15 @@ static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh, if (err) goto out; - err = ovl_parent_lock(indexdir, temp); + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = indexdir; + rd.new_parent = indexdir; + err = start_renaming_dentry(&rd, 0, temp, &name); if (err) goto out; - index = ovl_lookup_upper(ofs, name.name, indexdir, name.len); - if (IS_ERR(index)) { - err = PTR_ERR(index); - } else { - err = ovl_do_rename(ofs, indexdir, temp, indexdir, index, 0); - dput(index); - } - ovl_parent_unlock(indexdir); + + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); out: if (err) ovl_cleanup(ofs, indexdir, temp); @@ -763,7 +761,8 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *inode; struct path path = { .mnt = ovl_upper_mnt(ofs) }; - struct dentry *temp, *upper, *trap; + struct renamedata rd = {}; + struct dentry *temp; struct ovl_cu_creds cc; int err; struct ovl_cattr cattr = { @@ -807,29 +806,24 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) * ovl_copy_up_data(), so lock workdir and destdir and make sure that * temp wasn't moved before copy up completion or cleanup. */ - trap = lock_rename(c->workdir, c->destdir); - if (trap || temp->d_parent != c->workdir) { - /* temp or workdir moved underneath us? abort without cleanup */ - dput(temp); + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = c->workdir; + rd.new_parent = c->destdir; + rd.flags = 0; + err = start_renaming_dentry(&rd, 0, temp, + &QSTR_LEN(c->destname.name, c->destname.len)); + if (err) { + /* temp or workdir moved underneath us? map to -EIO */ err = -EIO; - if (!IS_ERR(trap)) - unlock_rename(c->workdir, c->destdir); - goto out; } - - err = ovl_copy_up_metadata(c, temp); if (err) - goto cleanup; + goto cleanup_unlocked; - upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir, - c->destname.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto cleanup; + err = ovl_copy_up_metadata(c, temp); + if (!err) + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); - err = ovl_do_rename(ofs, c->workdir, temp, c->destdir, upper, 0); - unlock_rename(c->workdir, c->destdir); - dput(upper); if (err) goto cleanup_unlocked; @@ -850,8 +844,6 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) return err; -cleanup: - unlock_rename(c->workdir, c->destdir); cleanup_unlocked: ovl_cleanup(ofs, c->workdir, temp); dput(temp); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index cf6fc48459f3..6b2f88edb497 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -57,8 +57,7 @@ int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, return 0; } -#define OVL_TEMPNAME_SIZE 20 -static void ovl_tempname(char name[OVL_TEMPNAME_SIZE]) +void ovl_tempname(char name[OVL_TEMPNAME_SIZE]) { static atomic_t temp_id = ATOMIC_INIT(0); @@ -66,22 +65,6 @@ static void ovl_tempname(char name[OVL_TEMPNAME_SIZE]) snprintf(name, OVL_TEMPNAME_SIZE, "#%x", atomic_inc_return(&temp_id)); } -struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir) -{ - struct dentry *temp; - char name[OVL_TEMPNAME_SIZE]; - - ovl_tempname(name); - temp = ovl_lookup_upper(ofs, name, workdir, strlen(name)); - if (!IS_ERR(temp) && temp->d_inode) { - pr_err("workdir/%s already exists\n", name); - dput(temp); - temp = ERR_PTR(-EIO); - } - - return temp; -} - static struct dentry *ovl_start_creating_temp(struct ovl_fs *ofs, struct dentry *workdir) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 3cc85a893b5c..746bc4ad7b37 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -447,11 +447,6 @@ static inline bool ovl_open_flags_need_copy_up(int flags) } /* util.c */ -int ovl_parent_lock(struct dentry *parent, struct dentry *child); -static inline void ovl_parent_unlock(struct dentry *parent) -{ - inode_unlock(parent->d_inode); -} int ovl_get_write_access(struct dentry *dentry); void ovl_put_write_access(struct dentry *dentry); void ovl_start_write(struct dentry *dentry); @@ -888,7 +883,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, struct dentry *newdentry, struct ovl_cattr *attr); int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, struct dentry *dentry); -struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir); +#define OVL_TEMPNAME_SIZE 20 +void ovl_tempname(char name[OVL_TEMPNAME_SIZE]); struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 6e0816c1147a..a721ef2b90e8 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -566,9 +566,10 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs) { struct dentry *workdir = ofs->workdir; struct dentry *temp; - struct dentry *dest; struct dentry *whiteout; struct name_snapshot name; + struct renamedata rd = {}; + char name2[OVL_TEMPNAME_SIZE]; int err; temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0)); @@ -576,23 +577,21 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs) if (IS_ERR(temp)) return err; - err = ovl_parent_lock(workdir, temp); + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = workdir; + rd.new_parent = workdir; + rd.flags = RENAME_WHITEOUT; + ovl_tempname(name2); + err = start_renaming_dentry(&rd, 0, temp, &QSTR(name2)); if (err) { dput(temp); return err; } - dest = ovl_lookup_temp(ofs, workdir); - err = PTR_ERR(dest); - if (IS_ERR(dest)) { - dput(temp); - ovl_parent_unlock(workdir); - return err; - } /* Name is inline and stable - using snapshot as a copy helper */ take_dentry_name_snapshot(&name, temp); - err = ovl_do_rename(ofs, workdir, temp, workdir, dest, RENAME_WHITEOUT); - ovl_parent_unlock(workdir); + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); if (err) { if (err == -EINVAL) err = 0; @@ -616,7 +615,6 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs) ovl_cleanup(ofs, workdir, temp); release_dentry_name_snapshot(&name); dput(temp); - dput(dest); return err; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 2da1c035f716..fffc22859211 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1548,14 +1548,3 @@ void ovl_copyattr(struct inode *inode) i_size_write(inode, i_size_read(realinode)); spin_unlock(&inode->i_lock); } - -int ovl_parent_lock(struct dentry *parent, struct dentry *child) -{ - inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); - if (!child || - (!d_unhashed(child) && child->d_parent == parent)) - return 0; - - inode_unlock(parent->d_inode); - return -EINVAL; -} diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 148c65d59e42..ea5ab5b0adb1 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -661,7 +661,6 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname, int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, char *newname, int flags) { - struct dentry *old_parent, *new_dentry, *trap; struct dentry *old_child = old_path->dentry; struct path new_path; struct qstr new_last; @@ -671,7 +670,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, struct ksmbd_file *parent_fp; int new_type; int err, lookup_flags = LOOKUP_NO_SYMLINKS; - int target_lookup_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; if (ksmbd_override_fsids(work)) return -ENOMEM; @@ -682,14 +680,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, goto revert_fsids; } - /* - * explicitly handle file overwrite case, for compatibility with - * filesystems that may not support rename flags (e.g: fuse) - */ - if (flags & RENAME_NOREPLACE) - target_lookup_flags |= LOOKUP_EXCL; - flags &= ~(RENAME_NOREPLACE); - retry: err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH, &new_path, &new_last, &new_type, @@ -706,17 +696,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, if (err) goto out2; - trap = lock_rename_child(old_child, new_path.dentry); - if (IS_ERR(trap)) { - err = PTR_ERR(trap); + rd.mnt_idmap = mnt_idmap(old_path->mnt); + rd.old_parent = NULL; + rd.new_parent = new_path.dentry; + rd.flags = flags; + rd.delegated_inode = NULL, + err = start_renaming_dentry(&rd, lookup_flags, old_child, &new_last); + if (err) goto out_drop_write; - } - - old_parent = dget(old_child->d_parent); - if (d_unhashed(old_child)) { - err = -EINVAL; - goto out3; - } parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent); if (parent_fp) { @@ -729,44 +716,17 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, ksmbd_fd_put(work, parent_fp); } - new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, - lookup_flags | target_lookup_flags); - if (IS_ERR(new_dentry)) { - err = PTR_ERR(new_dentry); - goto out3; - } - - if (d_is_symlink(new_dentry)) { + if (d_is_symlink(rd.new_dentry)) { err = -EACCES; - goto out4; - } - - if (old_child == trap) { - err = -EINVAL; - goto out4; - } - - if (new_dentry == trap) { - err = -ENOTEMPTY; - goto out4; + goto out3; } - rd.mnt_idmap = mnt_idmap(old_path->mnt), - rd.old_parent = old_parent, - rd.old_dentry = old_child, - rd.new_parent = new_path.dentry, - rd.new_dentry = new_dentry, - rd.flags = flags, - rd.delegated_inode = NULL, err = vfs_rename(&rd); if (err) ksmbd_debug(VFS, "vfs_rename failed err %d\n", err); -out4: - dput(new_dentry); out3: - dput(old_parent); - unlock_rename(old_parent, new_path.dentry); + end_renaming(&rd); out_drop_write: mnt_drop_write(old_path->mnt); out2: diff --git a/include/linux/namei.h b/include/linux/namei.h index 7fdd9fdcbd2b..c47713e9867c 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -159,6 +159,8 @@ extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); int start_renaming(struct renamedata *rd, int lookup_flags, struct qstr *old_last, struct qstr *new_last); +int start_renaming_dentry(struct renamedata *rd, int lookup_flags, + struct dentry *old_dentry, struct qstr *new_last); void end_renaming(struct renamedata *rd); /** -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown A few callers want to lock for a rename and already have both dentries. Also debugfs does want to perform a lookup but doesn't want permission checking, so start_renaming_dentry() cannot be used. This patch introduces start_renaming_two_dentries() which is given both dentries. debugfs performs one lookup itself. As it will only continue with a negative dentry and as those cannot be renamed or unlinked, it is safe to do the lookup before getting the rename locks. overlayfs uses start_renaming_two_dentries() in three places and selinux uses it twice in sel_make_policy_nodes(). In sel_make_policy_nodes() we now lock for rename twice instead of just once so the combined operation is no longer atomic w.r.t the parent directory locks. As selinux_state.policy_mutex is held across the whole operation this does not open up any interesting races. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- changes since v5: - sel_make_policy_nodes now uses "goto out" on error from start_renaming_two_dentries() changes since v3: added missing assignment to rd.mnt_idmap in ovl_cleanup_and_whiteout --- fs/debugfs/inode.c | 48 ++++++++++++-------------- fs/namei.c | 65 ++++++++++++++++++++++++++++++++++++ fs/overlayfs/dir.c | 43 ++++++++++++++++-------- include/linux/namei.h | 2 ++ security/selinux/selinuxfs.c | 15 +++++++-- 5 files changed, 131 insertions(+), 42 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index f241b9df642a..532bd7c46baf 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -842,7 +842,8 @@ int __printf(2, 3) debugfs_change_name(struct dentry *dentry, const char *fmt, . int error = 0; const char *new_name; struct name_snapshot old_name; - struct dentry *parent, *target; + struct dentry *target; + struct renamedata rd = {}; struct inode *dir; va_list ap; @@ -855,36 +856,31 @@ int __printf(2, 3) debugfs_change_name(struct dentry *dentry, const char *fmt, . if (!new_name) return -ENOMEM; - parent = dget_parent(dentry); - dir = d_inode(parent); - inode_lock(dir); + rd.old_parent = dget_parent(dentry); + rd.new_parent = rd.old_parent; + rd.flags = RENAME_NOREPLACE; + target = lookup_noperm_unlocked(&QSTR(new_name), rd.new_parent); + if (IS_ERR(target)) + return PTR_ERR(target); - take_dentry_name_snapshot(&old_name, dentry); - - if (WARN_ON_ONCE(dentry->d_parent != parent)) { - error = -EINVAL; - goto out; - } - if (strcmp(old_name.name.name, new_name) == 0) - goto out; - target = lookup_noperm(&QSTR(new_name), parent); - if (IS_ERR(target)) { - error = PTR_ERR(target); - goto out; - } - if (d_really_is_positive(target)) { - dput(target); - error = -EINVAL; + error = start_renaming_two_dentries(&rd, dentry, target); + if (error) { + if (error == -EEXIST && target == dentry) + /* it isn't an error to rename a thing to itself */ + error = 0; goto out; } - simple_rename_timestamp(dir, dentry, dir, target); - d_move(dentry, target); - dput(target); + + dir = d_inode(rd.old_parent); + take_dentry_name_snapshot(&old_name, dentry); + simple_rename_timestamp(dir, dentry, dir, rd.new_dentry); + d_move(dentry, rd.new_dentry); fsnotify_move(dir, dir, &old_name.name, d_is_dir(dentry), NULL, dentry); -out: release_dentry_name_snapshot(&old_name); - inode_unlock(dir); - dput(parent); + end_renaming(&rd); +out: + dput(rd.old_parent); + dput(target); kfree_const(new_name); return error; } diff --git a/fs/namei.c b/fs/namei.c index 4b740048df97..7f0384ceb976 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3877,6 +3877,71 @@ int start_renaming_dentry(struct renamedata *rd, int lookup_flags, } EXPORT_SYMBOL(start_renaming_dentry); +/** + * start_renaming_two_dentries - Lock to dentries in given parents for rename + * @rd: rename data containing parent + * @old_dentry: dentry of name to move + * @new_dentry: dentry to move to + * + * Ensure locks are in place for rename and check parentage is still correct. + * + * On success the two dentries are stored in @rd.old_dentry and + * @rd.new_dentry and @rd.old_parent and @rd.new_parent are confirmed to + * be the parents of the dentries. + * + * References and the lock can be dropped with end_renaming() + * + * Returns: zero or an error. + */ +int +start_renaming_two_dentries(struct renamedata *rd, + struct dentry *old_dentry, struct dentry *new_dentry) +{ + struct dentry *trap; + int err; + + /* Already have the dentry - need to be sure to lock the correct parent */ + trap = lock_rename_child(old_dentry, rd->new_parent); + if (IS_ERR(trap)) + return PTR_ERR(trap); + err = -EINVAL; + if (d_unhashed(old_dentry) || + (rd->old_parent && rd->old_parent != old_dentry->d_parent)) + /* old_dentry was removed, or moved and explicit parent requested */ + goto out_unlock; + if (d_unhashed(new_dentry) || + rd->new_parent != new_dentry->d_parent) + /* new_dentry was removed or moved */ + goto out_unlock; + + if (old_dentry == trap) + /* source is an ancestor of target */ + goto out_unlock; + + if (new_dentry == trap) { + /* target is an ancestor of source */ + if (rd->flags & RENAME_EXCHANGE) + err = -EINVAL; + else + err = -ENOTEMPTY; + goto out_unlock; + } + + err = -EEXIST; + if (d_is_positive(new_dentry) && (rd->flags & RENAME_NOREPLACE)) + goto out_unlock; + + rd->old_dentry = dget(old_dentry); + rd->new_dentry = dget(new_dentry); + rd->old_parent = dget(old_dentry->d_parent); + return 0; + +out_unlock: + unlock_rename(old_dentry->d_parent, rd->new_parent); + return err; +} +EXPORT_SYMBOL(start_renaming_two_dentries); + void end_renaming(struct renamedata *rd) { unlock_rename(rd->old_parent, rd->new_parent); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 6b2f88edb497..61e9484e4ab8 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -123,6 +123,7 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, struct dentry *dentry) { struct dentry *whiteout; + struct renamedata rd = {}; int err; int flags = 0; @@ -134,10 +135,14 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, if (d_is_dir(dentry)) flags = RENAME_EXCHANGE; - err = ovl_lock_rename_workdir(ofs->workdir, whiteout, dir, dentry); + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = ofs->workdir; + rd.new_parent = dir; + rd.flags = flags; + err = start_renaming_two_dentries(&rd, whiteout, dentry); if (!err) { - err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, dentry, flags); - unlock_rename(ofs->workdir, dir); + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); } if (err) goto kill_whiteout; @@ -388,6 +393,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *workdir = ovl_workdir(dentry); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); + struct renamedata rd = {}; struct path upperpath; struct dentry *upper; struct dentry *opaquedir; @@ -413,7 +419,11 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (IS_ERR(opaquedir)) goto out; - err = ovl_lock_rename_workdir(workdir, opaquedir, upperdir, upper); + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = workdir; + rd.new_parent = upperdir; + rd.flags = RENAME_EXCHANGE; + err = start_renaming_two_dentries(&rd, opaquedir, upper); if (err) goto out_cleanup_unlocked; @@ -431,8 +441,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (err) goto out_cleanup; - err = ovl_do_rename(ofs, workdir, opaquedir, upperdir, upper, RENAME_EXCHANGE); - unlock_rename(workdir, upperdir); + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); if (err) goto out_cleanup_unlocked; @@ -445,7 +455,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, return opaquedir; out_cleanup: - unlock_rename(workdir, upperdir); + end_renaming(&rd); out_cleanup_unlocked: ovl_cleanup(ofs, workdir, opaquedir); dput(opaquedir); @@ -468,6 +478,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *workdir = ovl_workdir(dentry); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); + struct renamedata rd = {}; struct dentry *upper; struct dentry *newdentry; int err; @@ -499,7 +510,11 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (IS_ERR(newdentry)) goto out_dput; - err = ovl_lock_rename_workdir(workdir, newdentry, upperdir, upper); + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); + rd.old_parent = workdir; + rd.new_parent = upperdir; + rd.flags = 0; + err = start_renaming_two_dentries(&rd, newdentry, upper); if (err) goto out_cleanup_unlocked; @@ -536,16 +551,16 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out_cleanup; - err = ovl_do_rename(ofs, workdir, newdentry, upperdir, upper, - RENAME_EXCHANGE); - unlock_rename(workdir, upperdir); + rd.flags = RENAME_EXCHANGE; + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); if (err) goto out_cleanup_unlocked; ovl_cleanup(ofs, workdir, upper); } else { - err = ovl_do_rename(ofs, workdir, newdentry, upperdir, upper, 0); - unlock_rename(workdir, upperdir); + err = ovl_do_rename_rd(&rd); + end_renaming(&rd); if (err) goto out_cleanup_unlocked; } @@ -565,7 +580,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, return err; out_cleanup: - unlock_rename(workdir, upperdir); + end_renaming(&rd); out_cleanup_unlocked: ovl_cleanup(ofs, workdir, newdentry); dput(newdentry); diff --git a/include/linux/namei.h b/include/linux/namei.h index c47713e9867c..9104c7104191 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -161,6 +161,8 @@ int start_renaming(struct renamedata *rd, int lookup_flags, struct qstr *old_last, struct qstr *new_last); int start_renaming_dentry(struct renamedata *rd, int lookup_flags, struct dentry *old_dentry, struct qstr *new_last); +int start_renaming_two_dentries(struct renamedata *rd, + struct dentry *old_dentry, struct dentry *new_dentry); void end_renaming(struct renamedata *rd); /** diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 232e087bce3e..404e08bf60ba 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -506,6 +506,7 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi, { int ret = 0; struct dentry *tmp_parent, *tmp_bool_dir, *tmp_class_dir; + struct renamedata rd = {}; unsigned int bool_num = 0; char **bool_names = NULL; int *bool_values = NULL; @@ -539,9 +540,14 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi, if (ret) goto out; - lock_rename(tmp_parent, fsi->sb->s_root); + rd.old_parent = tmp_parent; + rd.new_parent = fsi->sb->s_root; /* booleans */ + ret = start_renaming_two_dentries(&rd, tmp_bool_dir, fsi->bool_dir); + if (ret) + goto out; + d_exchange(tmp_bool_dir, fsi->bool_dir); swap(fsi->bool_num, bool_num); @@ -549,12 +555,17 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi, swap(fsi->bool_pending_values, bool_values); fsi->bool_dir = tmp_bool_dir; + end_renaming(&rd); /* classes */ + ret = start_renaming_two_dentries(&rd, tmp_class_dir, fsi->class_dir); + if (ret) + goto out; + d_exchange(tmp_class_dir, fsi->class_dir); fsi->class_dir = tmp_class_dir; - unlock_rename(tmp_parent, fsi->sb->s_root); + end_renaming(&rd); out: sel_remove_old_bool_data(bool_num, bool_names, bool_values); -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown This requires the addition of start_creating_dentry() which is given the dentry which has already been found, and asks for it to be locked and its parent validated. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- changes since v4 - two places in ecryptfs uses dget_parent(dentry->d_parent), thus incorrectly. getting grandparent. Changed to dget_parent(dentry). --- fs/ecryptfs/inode.c | 153 ++++++++++++++++++++---------------------- fs/namei.c | 33 +++++++++ include/linux/namei.h | 2 + 3 files changed, 107 insertions(+), 81 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index fc6d37419753..37d6293600c7 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -24,18 +24,26 @@ #include #include "ecryptfs_kernel.h" -static int lock_parent(struct dentry *dentry, - struct dentry **lower_dentry, - struct inode **lower_dir) +static struct dentry *ecryptfs_start_creating_dentry(struct dentry *dentry) { - struct dentry *lower_dir_dentry; + struct dentry *parent = dget_parent(dentry); + struct dentry *ret; - lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); - *lower_dir = d_inode(lower_dir_dentry); - *lower_dentry = ecryptfs_dentry_to_lower(dentry); + ret = start_creating_dentry(ecryptfs_dentry_to_lower(parent), + ecryptfs_dentry_to_lower(dentry)); + dput(parent); + return ret; +} - inode_lock_nested(*lower_dir, I_MUTEX_PARENT); - return (*lower_dentry)->d_parent == lower_dir_dentry ? 0 : -EINVAL; +static struct dentry *ecryptfs_start_removing_dentry(struct dentry *dentry) +{ + struct dentry *parent = dget_parent(dentry); + struct dentry *ret; + + ret = start_removing_dentry(ecryptfs_dentry_to_lower(parent), + ecryptfs_dentry_to_lower(dentry)); + dput(parent); + return ret; } static int ecryptfs_inode_test(struct inode *inode, void *lower_inode) @@ -141,15 +149,12 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, struct inode *lower_dir; int rc; - rc = lock_parent(dentry, &lower_dentry, &lower_dir); - dget(lower_dentry); // don't even try to make the lower negative - if (!rc) { - if (d_unhashed(lower_dentry)) - rc = -EINVAL; - else - rc = vfs_unlink(&nop_mnt_idmap, lower_dir, lower_dentry, - NULL); - } + lower_dentry = ecryptfs_start_removing_dentry(dentry); + if (IS_ERR(lower_dentry)) + return PTR_ERR(lower_dentry); + + lower_dir = lower_dentry->d_parent->d_inode; + rc = vfs_unlink(&nop_mnt_idmap, lower_dir, lower_dentry, NULL); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); goto out_unlock; @@ -158,8 +163,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink); inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); out_unlock: - dput(lower_dentry); - inode_unlock(lower_dir); + end_removing(lower_dentry); if (!rc) d_drop(dentry); return rc; @@ -186,10 +190,12 @@ ecryptfs_do_create(struct inode *directory_inode, struct inode *lower_dir; struct inode *inode; - rc = lock_parent(ecryptfs_dentry, &lower_dentry, &lower_dir); - if (!rc) - rc = vfs_create(&nop_mnt_idmap, lower_dir, - lower_dentry, mode, true); + lower_dentry = ecryptfs_start_creating_dentry(ecryptfs_dentry); + if (IS_ERR(lower_dentry)) + return ERR_CAST(lower_dentry); + lower_dir = lower_dentry->d_parent->d_inode; + rc = vfs_create(&nop_mnt_idmap, lower_dir, + lower_dentry, mode, true); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " "rc = [%d]\n", __func__, rc); @@ -205,7 +211,7 @@ ecryptfs_do_create(struct inode *directory_inode, fsstack_copy_attr_times(directory_inode, lower_dir); fsstack_copy_inode_size(directory_inode, lower_dir); out_lock: - inode_unlock(lower_dir); + end_creating(lower_dentry, NULL); return inode; } @@ -433,10 +439,12 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, file_size_save = i_size_read(d_inode(old_dentry)); lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); - rc = lock_parent(new_dentry, &lower_new_dentry, &lower_dir); - if (!rc) - rc = vfs_link(lower_old_dentry, &nop_mnt_idmap, lower_dir, - lower_new_dentry, NULL); + lower_new_dentry = ecryptfs_start_creating_dentry(new_dentry); + if (IS_ERR(lower_new_dentry)) + return PTR_ERR(lower_new_dentry); + lower_dir = lower_new_dentry->d_parent->d_inode; + rc = vfs_link(lower_old_dentry, &nop_mnt_idmap, lower_dir, + lower_new_dentry, NULL); if (rc || d_really_is_negative(lower_new_dentry)) goto out_lock; rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb); @@ -448,7 +456,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, ecryptfs_inode_to_lower(d_inode(old_dentry))->i_nlink); i_size_write(d_inode(new_dentry), file_size_save); out_lock: - inode_unlock(lower_dir); + end_creating(lower_new_dentry, NULL); return rc; } @@ -468,9 +476,11 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap, size_t encoded_symlen; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; - rc = lock_parent(dentry, &lower_dentry, &lower_dir); - if (rc) - goto out_lock; + lower_dentry = ecryptfs_start_creating_dentry(dentry); + if (IS_ERR(lower_dentry)) + return PTR_ERR(lower_dentry); + lower_dir = lower_dentry->d_parent->d_inode; + mount_crypt_stat = &ecryptfs_superblock_to_private( dir->i_sb)->mount_crypt_stat; rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname, @@ -490,7 +500,7 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap, fsstack_copy_attr_times(dir, lower_dir); fsstack_copy_inode_size(dir, lower_dir); out_lock: - inode_unlock(lower_dir); + end_creating(lower_dentry, NULL); if (d_really_is_negative(dentry)) d_drop(dentry); return rc; @@ -501,12 +511,14 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, { int rc; struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; struct inode *lower_dir; - rc = lock_parent(dentry, &lower_dentry, &lower_dir); - if (rc) - goto out; - + lower_dentry = ecryptfs_start_creating_dentry(dentry); + if (IS_ERR(lower_dentry)) + return lower_dentry; + lower_dir_dentry = dget(lower_dentry->d_parent); + lower_dir = lower_dir_dentry->d_inode; lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir, lower_dentry, mode); rc = PTR_ERR(lower_dentry); @@ -522,7 +534,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, fsstack_copy_inode_size(dir, lower_dir); set_nlink(dir, lower_dir->i_nlink); out: - inode_unlock(lower_dir); + end_creating(lower_dentry, lower_dir_dentry); if (d_really_is_negative(dentry)) d_drop(dentry); return ERR_PTR(rc); @@ -534,21 +546,18 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) struct inode *lower_dir; int rc; - rc = lock_parent(dentry, &lower_dentry, &lower_dir); - dget(lower_dentry); // don't even try to make the lower negative - if (!rc) { - if (d_unhashed(lower_dentry)) - rc = -EINVAL; - else - rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry); - } + lower_dentry = ecryptfs_start_removing_dentry(dentry); + if (IS_ERR(lower_dentry)) + return PTR_ERR(lower_dentry); + lower_dir = lower_dentry->d_parent->d_inode; + + rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry); if (!rc) { clear_nlink(d_inode(dentry)); fsstack_copy_attr_times(dir, lower_dir); set_nlink(dir, lower_dir->i_nlink); } - dput(lower_dentry); - inode_unlock(lower_dir); + end_removing(lower_dentry); if (!rc) d_drop(dentry); return rc; @@ -562,10 +571,12 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *lower_dentry; struct inode *lower_dir; - rc = lock_parent(dentry, &lower_dentry, &lower_dir); - if (!rc) - rc = vfs_mknod(&nop_mnt_idmap, lower_dir, - lower_dentry, mode, dev); + lower_dentry = ecryptfs_start_creating_dentry(dentry); + if (IS_ERR(lower_dentry)) + return PTR_ERR(lower_dentry); + lower_dir = lower_dentry->d_parent->d_inode; + + rc = vfs_mknod(&nop_mnt_idmap, lower_dir, lower_dentry, mode, dev); if (rc || d_really_is_negative(lower_dentry)) goto out; rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); @@ -574,7 +585,7 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir, fsstack_copy_attr_times(dir, lower_dir); fsstack_copy_inode_size(dir, lower_dir); out: - inode_unlock(lower_dir); + end_removing(lower_dentry); if (d_really_is_negative(dentry)) d_drop(dentry); return rc; @@ -590,7 +601,6 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *lower_new_dentry; struct dentry *lower_old_dir_dentry; struct dentry *lower_new_dir_dentry; - struct dentry *trap; struct inode *target_inode; struct renamedata rd = {}; @@ -605,31 +615,13 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, target_inode = d_inode(new_dentry); - trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); - if (IS_ERR(trap)) - return PTR_ERR(trap); - dget(lower_new_dentry); - rc = -EINVAL; - if (lower_old_dentry->d_parent != lower_old_dir_dentry) - goto out_lock; - if (lower_new_dentry->d_parent != lower_new_dir_dentry) - goto out_lock; - if (d_unhashed(lower_old_dentry) || d_unhashed(lower_new_dentry)) - goto out_lock; - /* source should not be ancestor of target */ - if (trap == lower_old_dentry) - goto out_lock; - /* target should not be ancestor of source */ - if (trap == lower_new_dentry) { - rc = -ENOTEMPTY; - goto out_lock; - } + rd.mnt_idmap = &nop_mnt_idmap; + rd.old_parent = lower_old_dir_dentry; + rd.new_parent = lower_new_dir_dentry; + rc = start_renaming_two_dentries(&rd, lower_old_dentry, lower_new_dentry); + if (rc) + return rc; - rd.mnt_idmap = &nop_mnt_idmap; - rd.old_parent = lower_old_dir_dentry; - rd.old_dentry = lower_old_dentry; - rd.new_parent = lower_new_dir_dentry; - rd.new_dentry = lower_new_dentry; rc = vfs_rename(&rd); if (rc) goto out_lock; @@ -640,8 +632,7 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (new_dir != old_dir) fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry)); out_lock: - dput(lower_new_dentry); - unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + end_renaming(&rd); return rc; } diff --git a/fs/namei.c b/fs/namei.c index 7f0384ceb976..2444c7ddb926 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3397,6 +3397,39 @@ struct dentry *start_removing_noperm(struct dentry *parent, } EXPORT_SYMBOL(start_removing_noperm); +/** + * start_creating_dentry - prepare to create a given dentry + * @parent: directory from which dentry should be removed + * @child: the dentry to be removed + * + * A lock is taken to protect the dentry again other dirops and + * the validity of the dentry is checked: correct parent and still hashed. + * + * If the dentry is valid and negative a reference is taken and + * returned. If not an error is returned. + * + * end_creating() should be called when creation is complete, or aborted. + * + * Returns: the valid dentry, or an error. + */ +struct dentry *start_creating_dentry(struct dentry *parent, + struct dentry *child) +{ + inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); + if (unlikely(IS_DEADDIR(parent->d_inode) || + child->d_parent != parent || + d_unhashed(child))) { + inode_unlock(parent->d_inode); + return ERR_PTR(-EINVAL); + } + if (d_is_positive(child)) { + inode_unlock(parent->d_inode); + return ERR_PTR(-EEXIST); + } + return dget(child); +} +EXPORT_SYMBOL(start_creating_dentry); + /** * start_removing_dentry - prepare to remove a given dentry * @parent: directory from which dentry should be removed diff --git a/include/linux/namei.h b/include/linux/namei.h index 9104c7104191..0e6b1b9afc26 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -101,6 +101,8 @@ struct dentry *start_removing_killable(struct mnt_idmap *idmap, struct qstr *name); struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name); struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name); +struct dentry *start_creating_dentry(struct dentry *parent, + struct dentry *child); struct dentry *start_removing_dentry(struct dentry *parent, struct dentry *child); -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown vfs_mkdir() already drops the reference to the dentry on failure but it leaves the parent locked. This complicates end_creating() which needs to unlock the parent even though the dentry is no longer available. If we change vfs_mkdir() to unlock on failure as well as releasing the dentry, we can remove the "parent" arg from end_creating() and simplify the rules for calling it. Note that cachefiles_get_directory() can choose to substitute an error instead of actually calling vfs_mkdir(), for fault injection. In that case it needs to call end_creating(), just as vfs_mkdir() now does on error. ovl_create_real() will now unlock on error. So the conditional end_creating() after the call is removed, and end_creating() is called internally on error. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Tested-by: syzbot@syzkaller.appspotmail.com Signed-off-by: NeilBrown --- changes since v5 - ovl_create_real() now calls end_creating() on error. changes since v2: - extra {} in if() branch in cachefiles_get_directory() to match the new extra {} in the else branch. - filesystems/porting.rst updated. --- Documentation/filesystems/porting.rst | 13 +++++++++++++ fs/btrfs/ioctl.c | 2 +- fs/cachefiles/namei.c | 16 ++++++++------- fs/ecryptfs/inode.c | 8 ++++---- fs/namei.c | 4 ++-- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4recover.c | 2 +- fs/nfsd/nfsproc.c | 2 +- fs/nfsd/vfs.c | 8 ++++---- fs/overlayfs/copy_up.c | 4 ++-- fs/overlayfs/dir.c | 18 ++++++++--------- fs/overlayfs/super.c | 6 +++--- fs/xfs/scrub/orphanage.c | 2 +- include/linux/namei.h | 28 +++++++++------------------ ipc/mqueue.c | 2 +- 16 files changed, 61 insertions(+), 58 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 35f027981b21..d33429294252 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1309,3 +1309,16 @@ a different length, use vfs_parse_fs_qstr(fc, key, &QSTR_LEN(value, len)) instead. + +--- + +**mandatory** + +vfs_mkdir() now returns a dentry - the one returned by ->mkdir(). If +that dentry is different from the dentry passed in, including if it is +an IS_ERR() dentry pointer, the original dentry is dput(). + +When vfs_mkdir() returns an error, and so both dputs() the original +dentry and doesn't provide a replacement, it also unlocks the parent. +Consequently the return value from vfs_mkdir() can be passed to +end_creating() and the parent will be unlocked precisely when necessary. diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d0c3bb0423bb..b138120feba3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -935,7 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent, out_up_read: up_read(&fs_info->subvol_sem); out_dput: - end_creating(dentry, parent); + end_creating(dentry); return ret; } diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 0104ac00485d..59327618ac42 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -128,10 +128,12 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, if (ret < 0) goto mkdir_error; ret = cachefiles_inject_write_error(); - if (ret == 0) + if (ret == 0) { subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); - else + } else { + end_creating(subdir); subdir = ERR_PTR(ret); + } if (IS_ERR(subdir)) { trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, cachefiles_trace_mkdir_error); @@ -140,7 +142,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, trace_cachefiles_mkdir(dir, subdir); if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) { - end_creating(subdir, dir); + end_creating(subdir); goto retry; } ASSERT(d_backing_inode(subdir)); @@ -154,7 +156,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* Tell rmdir() it's not allowed to delete the subdir */ inode_lock(d_inode(subdir)); dget(subdir); - end_creating(subdir, dir); + end_creating(subdir); if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) { pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n", @@ -196,7 +198,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, return ERR_PTR(-EBUSY); mkdir_error: - end_creating(subdir, dir); + end_creating(subdir); pr_err("mkdir %s failed with error %d\n", dirname, ret); return ERR_PTR(ret); @@ -699,7 +701,7 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, if (ret < 0) goto out_end; - end_creating(dentry, fan); + end_creating(dentry); ret = cachefiles_inject_read_error(); if (ret == 0) @@ -733,7 +735,7 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, } out_end: - end_creating(dentry, fan); + end_creating(dentry); out: _leave(" = %u", success); return success; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 37d6293600c7..c951e723f24d 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -211,7 +211,7 @@ ecryptfs_do_create(struct inode *directory_inode, fsstack_copy_attr_times(directory_inode, lower_dir); fsstack_copy_inode_size(directory_inode, lower_dir); out_lock: - end_creating(lower_dentry, NULL); + end_creating(lower_dentry); return inode; } @@ -456,7 +456,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, ecryptfs_inode_to_lower(d_inode(old_dentry))->i_nlink); i_size_write(d_inode(new_dentry), file_size_save); out_lock: - end_creating(lower_new_dentry, NULL); + end_creating(lower_new_dentry); return rc; } @@ -500,7 +500,7 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap, fsstack_copy_attr_times(dir, lower_dir); fsstack_copy_inode_size(dir, lower_dir); out_lock: - end_creating(lower_dentry, NULL); + end_creating(lower_dentry); if (d_really_is_negative(dentry)) d_drop(dentry); return rc; @@ -534,7 +534,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, fsstack_copy_inode_size(dir, lower_dir); set_nlink(dir, lower_dir->i_nlink); out: - end_creating(lower_dentry, lower_dir_dentry); + end_creating(lower_dentry); if (d_really_is_negative(dentry)) d_drop(dentry); return ERR_PTR(rc); diff --git a/fs/namei.c b/fs/namei.c index 2444c7ddb926..e834486acff1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4832,7 +4832,7 @@ EXPORT_SYMBOL(start_creating_path); */ void end_creating_path(const struct path *path, struct dentry *dentry) { - end_creating(dentry, path->dentry); + end_creating(dentry); mnt_drop_write(path->mnt); path_put(path); } @@ -5034,7 +5034,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, return dentry; err: - dput(dentry); + end_creating(dentry); return ERR_PTR(error); } EXPORT_SYMBOL(vfs_mkdir); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index e2aac0def2cb..6b39e4aff959 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -364,7 +364,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); out: - end_creating(child, parent); + end_creating(child); out_write: fh_drop_write(fhp); return status; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b2c95e8e7c68..524cb07a477c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -376,7 +376,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (attrs.na_aclerr) open->op_bmval[0] &= ~FATTR4_WORD0_ACL; out: - end_creating(child, parent); + end_creating(child); nfsd_attrs_free(&attrs); out_write: fh_drop_write(fhp); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 3eefaa2202e3..18c08395b273 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -215,7 +215,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) if (IS_ERR(dentry)) status = PTR_ERR(dentry); out_end: - end_creating(dentry, dir); + end_creating(dentry); out: if (status == 0) { if (nn->in_grace) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index ee1b16e921fd..28f03a6a3cc3 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -421,7 +421,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) } out_unlock: - end_creating(dchild, dirfhp->fh_dentry); + end_creating(dchild); out_write: fh_drop_write(dirfhp); done: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a993f1e54182..145f1c8d124d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1589,7 +1589,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, out: if (!err) fh_fill_post_attrs(fhp); - end_creating(dchild, dentry); + end_creating(dchild); return err; out_nfserr: @@ -1646,7 +1646,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; out_unlock: - end_creating(dchild, dentry); + end_creating(dchild); return err; } @@ -1747,7 +1747,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_create_setattr(rqstp, fhp, resfhp, attrs); fh_fill_post_attrs(fhp); out_unlock: - end_creating(dnew, dentry); + end_creating(dnew); if (!err) err = nfserrno(commit_metadata(fhp)); if (!err) @@ -1824,7 +1824,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL); fh_fill_post_attrs(ffhp); out_unlock: - end_creating(dnew, ddir); + end_creating(dnew); if (!host_err) { host_err = commit_metadata(ffhp); if (!host_err) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 9911a346b477..23216ed01325 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -624,7 +624,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, upper); } - end_creating(upper, upperdir); + end_creating(upper); } if (err) goto out; @@ -891,7 +891,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, temp, udir, upper); - end_creating(upper, c->destdir); + end_creating(upper); } if (err) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 61e9484e4ab8..739f974dc258 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -91,7 +91,7 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs) err = ovl_do_whiteout(ofs, wdir, whiteout); if (!err) ofs->whiteout = dget(whiteout); - end_creating(whiteout, workdir); + end_creating(whiteout); if (err) return ERR_PTR(err); } @@ -103,7 +103,7 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs) err = ovl_do_link(ofs, ofs->whiteout, wdir, link); if (!err) whiteout = dget(link); - end_creating(link, workdir); + end_creating(link); if (!err) return whiteout;; @@ -187,7 +187,7 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, if (!err && ofs->casefold != ovl_dentry_casefolded(newdentry)) { pr_warn_ratelimited("wrong inherited casefold (%pd2)\n", newdentry); - dput(newdentry); + end_creating(newdentry); err = -EINVAL; } break; @@ -237,8 +237,7 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, } out: if (err) { - if (!IS_ERR(newdentry)) - dput(newdentry); + end_creating(newdentry); return ERR_PTR(err); } return newdentry; @@ -254,7 +253,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, ret = ovl_create_real(ofs, workdir, ret, attr); if (!IS_ERR(ret)) dget(ret); - end_creating(ret, workdir); + end_creating(ret); return ret; } @@ -362,12 +361,11 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (IS_ERR(newdentry)) return PTR_ERR(newdentry); newdentry = ovl_create_real(ofs, upperdir, newdentry, attr); - if (IS_ERR(newdentry)) { - end_creating(newdentry, upperdir); + if (IS_ERR(newdentry)) return PTR_ERR(newdentry); - } + dget(newdentry); - end_creating(newdentry, upperdir); + end_creating(newdentry); if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) && !ovl_allow_offline_changes(ofs)) { diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index a721ef2b90e8..3acda985c8a3 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -320,7 +320,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, if (work->d_inode) { dget(work); - end_creating(work, ofs->workbasedir); + end_creating(work); if (persist) return work; err = -EEXIST; @@ -338,7 +338,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); if (!IS_ERR(work)) dget(work); - end_creating(work, ofs->workbasedir); + end_creating(work); err = PTR_ERR(work); if (IS_ERR(work)) goto out_err; @@ -632,7 +632,7 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, OVL_CATTR(mode)); if (!IS_ERR(child)) dget(child); - end_creating(child, parent); + end_creating(child); } dput(parent); diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index e732605924a1..b77c2b6b6d44 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -199,7 +199,7 @@ xrep_orphanage_create( sc->orphanage_ilock_flags = 0; out_dput_orphanage: - end_creating(orphanage_dentry, root_dentry); + end_creating(orphanage_dentry); out_dput_root: dput(root_dentry); out: diff --git a/include/linux/namei.h b/include/linux/namei.h index 0e6b1b9afc26..b4d95b79b5a8 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -106,34 +106,24 @@ struct dentry *start_creating_dentry(struct dentry *parent, struct dentry *start_removing_dentry(struct dentry *parent, struct dentry *child); -/** - * end_creating - finish action started with start_creating - * @child: dentry returned by start_creating() or vfs_mkdir() - * @parent: dentry given to start_creating(), - * - * Unlock and release the child. +/* end_creating - finish action started with start_creating + * @child: dentry returned by start_creating() or vfs_mkdir() * - * Unlike end_dirop() this can only be called if start_creating() succeeded. - * It handles @child being and error as vfs_mkdir() might have converted the - * dentry to an error - in that case the parent still needs to be unlocked. + * Unlock and release the child. This can be called after + * start_creating() whether that function succeeded or not, + * but it is not needed on failure. * * If vfs_mkdir() was called then the value returned from that function * should be given for @child rather than the original dentry, as vfs_mkdir() - * may have provided a new dentry. Even if vfs_mkdir() returns an error - * it must be given to end_creating(). + * may have provided a new dentry. + * * * If vfs_mkdir() was not called, then @child will be a valid dentry and * @parent will be ignored. */ -static inline void end_creating(struct dentry *child, struct dentry *parent) +static inline void end_creating(struct dentry *child) { - if (IS_ERR(child)) - /* The parent is still locked despite the error from - * vfs_mkdir() - must unlock it. - */ - inode_unlock(parent->d_inode); - else - end_dirop(child); + end_dirop(child); } /** diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 6d7610310003..83d9466710d6 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -932,7 +932,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, put_unused_fd(fd); fd = error; } - end_creating(path.dentry, root); + end_creating(path.dentry); if (!ro) mnt_drop_write(mnt); out_putname: -- 2.50.0.107.gf914562f5916.dirty From: NeilBrown Occasionally the caller of end_creating() wants to keep using the dentry. Rather then requiring them to dget() the dentry (when not an error) before calling end_creating(), provide end_creating_keep() which does this. cachefiles and overlayfs make use of this. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- fs/cachefiles/namei.c | 3 +-- fs/overlayfs/dir.c | 8 ++------ fs/overlayfs/super.c | 11 +++-------- include/linux/namei.h | 22 ++++++++++++++++++++++ 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 59327618ac42..ef22ac19545b 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -155,8 +155,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* Tell rmdir() it's not allowed to delete the subdir */ inode_lock(d_inode(subdir)); - dget(subdir); - end_creating(subdir); + end_creating_keep(subdir); if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) { pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n", diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 739f974dc258..d21f81a524f6 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -251,10 +251,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, if (IS_ERR(ret)) return ret; ret = ovl_create_real(ofs, workdir, ret, attr); - if (!IS_ERR(ret)) - dget(ret); - end_creating(ret); - return ret; + return end_creating_keep(ret); } static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, @@ -364,8 +361,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (IS_ERR(newdentry)) return PTR_ERR(newdentry); - dget(newdentry); - end_creating(newdentry); + end_creating_keep(newdentry); if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) && !ovl_allow_offline_changes(ofs)) { diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 3acda985c8a3..7b8fc1cab6eb 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -319,8 +319,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, }; if (work->d_inode) { - dget(work); - end_creating(work); + end_creating_keep(work); if (persist) return work; err = -EEXIST; @@ -336,9 +335,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, } work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); - if (!IS_ERR(work)) - dget(work); - end_creating(work); + end_creating_keep(work); err = PTR_ERR(work); if (IS_ERR(work)) goto out_err; @@ -630,9 +627,7 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, if (!child->d_inode) child = ovl_create_real(ofs, parent, child, OVL_CATTR(mode)); - if (!IS_ERR(child)) - dget(child); - end_creating(child); + end_creating_keep(child); } dput(parent); diff --git a/include/linux/namei.h b/include/linux/namei.h index b4d95b79b5a8..58600cf234bc 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -126,6 +126,28 @@ static inline void end_creating(struct dentry *child) end_dirop(child); } +/* end_creating_keep - finish action started with start_creating() and return result + * @child: dentry returned by start_creating() or vfs_mkdir() + * + * Unlock and return the child. This can be called after + * start_creating() whether that function succeeded or not, + * but it is not needed on failure. + * + * If vfs_mkdir() was called then the value returned from that function + * should be given for @child rather than the original dentry, as vfs_mkdir() + * may have provided a new dentry. + * + * Returns: @child, which may be a dentry or an error. + * + */ +static inline struct dentry *end_creating_keep(struct dentry *child) +{ + if (!IS_ERR(child)) + dget(child); + end_dirop(child); + return child; +} + /** * end_removing - finish action started with start_removing * @child: dentry returned by start_removing() -- 2.50.0.107.gf914562f5916.dirty