From: NeilBrown start_removing() is similar to start_creating() but will only return a positive dentry with the expectation that it will be removed. This is used by nfsd, cachefiles, and overlayfs. They are changed to also use end_removing() to terminate the action begun by start_removing(). This is a simple alias for end_dirop(). Apart from changes to the error paths, as we no longer need to unlock on a lookup error, an effect on callers is that they don't need to test if the found dentry is positive or negative - they can be sure it is positive. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown --- fs/cachefiles/namei.c | 32 ++++++++++++++------------------ fs/namei.c | 27 +++++++++++++++++++++++++++ fs/nfsd/nfs4recover.c | 18 +++++------------- fs/nfsd/vfs.c | 26 ++++++++++---------------- fs/overlayfs/dir.c | 15 +++++++-------- fs/overlayfs/overlayfs.h | 8 ++++++++ include/linux/namei.h | 18 ++++++++++++++++++ 7 files changed, 89 insertions(+), 55 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 0a136eb434da..c7f0c6ab9b88 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache, * - File backed objects are unlinked * - Directory backed objects are stuffed into the graveyard for userspace to * delete + * On entry dir must be locked. It will be unlocked on exit. */ int cachefiles_bury_object(struct cachefiles_cache *cache, struct cachefiles_object *object, @@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache, _enter(",'%pd','%pd'", dir, rep); + /* end_removing() will dput() @rep but we need to keep + * a ref, so take one now. This also stops the dentry + * being negated when unlinked which we need. + */ + dget(rep); + if (rep->d_parent != dir) { - inode_unlock(d_inode(dir)); + end_removing(rep); _leave(" = -ESTALE"); return -ESTALE; } /* non-directories can just be unlinked */ if (!d_is_dir(rep)) { - dget(rep); /* Stop the dentry being negated if it's only pinned - * by a file struct. - */ ret = cachefiles_unlink(cache, object, dir, rep, why); - dput(rep); + end_removing(rep); - inode_unlock(d_inode(dir)); _leave(" = %d", ret); return ret; } /* directories have to be moved to the graveyard */ _debug("move stale object to graveyard"); - inode_unlock(d_inode(dir)); + end_removing(rep); try_again: /* first step is to make up a grave dentry in the graveyard */ @@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache, struct dentry *victim; int ret = -ENOENT; - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename)); - victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir); if (IS_ERR(victim)) goto lookup_error; - if (d_is_negative(victim)) - goto lookup_put; if (d_inode(victim)->i_flags & S_KERNEL_FILE) goto lookup_busy; return victim; lookup_busy: ret = -EBUSY; -lookup_put: - inode_unlock(d_inode(dir)); - dput(victim); + end_removing(victim); return ERR_PTR(ret); lookup_error: - inode_unlock(d_inode(dir)); ret = PTR_ERR(victim); if (ret == -ENOENT) return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */ @@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, ret = cachefiles_bury_object(cache, NULL, dir, victim, FSCACHE_OBJECT_WAS_CULLED); + dput(victim); if (ret < 0) goto error; fscache_count_culled(); - dput(victim); _leave(" = 0"); return 0; error_unlock: - inode_unlock(d_inode(dir)); + end_removing(victim); error: - dput(victim); if (ret == -ENOENT) return -ESTALE; /* Probably got retired by the netfs */ diff --git a/fs/namei.c b/fs/namei.c index 8873ad0f05b0..38dda29552f6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, } EXPORT_SYMBOL(start_creating); +/** + * start_removing - prepare to remove a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to find the name + * @name: the name to be removed + * + * Locks are taken and a lookup in performed prior to removing + * an object from a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name doesn't exist, an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * Returns: a positive dentry, or an error. + */ +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, 0); +} +EXPORT_SYMBOL(start_removing); + #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index c247a7c3291c..3eefaa2202e3 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn) dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name); dir = nn->rec_file->f_path.dentry; - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); - dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir); - if (IS_ERR(dentry)) { - status = PTR_ERR(dentry); - goto out_unlock; - } - status = -ENOENT; - if (d_really_is_negative(dentry)) - goto out; + dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name)); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry); -out: - dput(dentry); -out_unlock: - inode_unlock(d_inode(dir)); + end_removing(dentry); return status; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 24e501abad0e..6291c371caa7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, { struct dentry *dentry, *rdentry; struct inode *dirp; - struct inode *rinode; + struct inode *rinode = NULL; __be32 err; int host_err; @@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, dentry = fhp->fh_dentry; dirp = d_inode(dentry); - inode_lock_nested(dirp, I_MUTEX_PARENT); - rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); + rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen)); + host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_unlock; + goto out_drop_write; - if (d_really_is_negative(rdentry)) { - dput(rdentry); - host_err = -ENOENT; - goto out_unlock; - } - rinode = d_inode(rdentry); err = fh_fill_pre_attrs(fhp); if (err != nfs_ok) goto out_unlock; + rinode = d_inode(rdentry); + /* Prevent truncation until after locks dropped */ ihold(rinode); + if (!type) type = d_inode(rdentry)->i_mode & S_IFMT; @@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, } fh_fill_post_attrs(fhp); - inode_unlock(dirp); - if (!host_err) +out_unlock: + end_removing(rdentry); + if (!err && !host_err) host_err = commit_metadata(fhp); - dput(rdentry); iput(rinode); /* truncate the inode here */ out_drop_write: @@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, } out: return err != nfs_ok ? err : nfserrno(host_err); -out_unlock: - inode_unlock(dirp); - goto out_drop_write; } /* diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index b9160fefbd00..20682afdbd20 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, goto out; } - inode_lock_nested(dir, I_MUTEX_PARENT); - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, - dentry->d_name.len); + upper = ovl_start_removing_upper(ofs, upperdir, + &QSTR_LEN(dentry->d_name.name, + dentry->d_name.len)); err = PTR_ERR(upper); if (IS_ERR(upper)) - goto out_unlock; + goto out_dput; err = -ESTALE; if ((opaquedir && upper != opaquedir) || (!opaquedir && !ovl_matches_upper(dentry, upper))) - goto out_dput_upper; + goto out_unlock; if (is_dir) err = ovl_do_rmdir(ofs, dir, upper); @@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, */ if (!err) d_drop(dentry); -out_dput_upper: - dput(upper); out_unlock: - inode_unlock(dir); + end_removing(upper); +out_dput: dput(opaquedir); out: return err; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index beeba96cfcb2..49ad65f829dc 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs, parent, name); } +static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs, + struct dentry *parent, + struct qstr *name) +{ + return start_removing(ovl_upper_mnt_idmap(ofs), + parent, name); +} + static inline bool ovl_open_flags_need_copy_up(int flags) { if (!flags) diff --git a/include/linux/namei.h b/include/linux/namei.h index 37b72f4a64f0..6d1069f93ebf 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -91,6 +91,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap, struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, struct qstr *name); +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, + struct qstr *name); /** * end_creating - finish action started with start_creating @@ -122,6 +124,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent) end_dirop(child); } +/** + * end_removing - finish action started with start_removing + * @child: dentry returned by start_removing() + * @parent: dentry given to start_removing() + * + * Unlock and release the child. + * + * This is identical to end_dirop(). It can be passed the result of + * start_removing() whether that was successful or not, but it not needed + * if start_removing() failed. + */ +static inline void end_removing(struct dentry *child) +{ + end_dirop(child); +} + extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); -- 2.50.0.107.gf914562f5916.dirty