PID 1 may choose to stop sharing fs_struct state with us. Either via unshare(CLONE_FS) or unshare(CLONE_NEWNS). Of course, PID 1 could have chosen to create arbitrary process trees that all share fs_struct state via CLONE_FS. This is a strong statement: We only care about PID 1 aka the thread-group leader so subthread's fs_struct state doesn't matter. PID 1 unsharing fs_struct state is a bug. PID 1 relies on various kthreads to be able to perform work based on its fs_struct state. Breaking that contract sucks for both sides. So just don't bother with extra work for this. No sane init system should ever do this. Signed-off-by: Christian Brauner --- fs/fs_struct.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/fs_struct.h | 2 ++ kernel/fork.c | 14 +++----------- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 394875d06fd6..3ff79fb894c1 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -147,6 +147,47 @@ int unshare_fs_struct(void) } EXPORT_SYMBOL_GPL(unshare_fs_struct); +/* + * PID 1 may choose to stop sharing fs_struct state with us. + * Either via unshare(CLONE_FS) or unshare(CLONE_NEWNS). Of + * course, PID 1 could have chosen to create arbitrary process + * trees that all share fs_struct state via CLONE_FS. This is a + * strong statement: We only care about PID 1 aka the thread-group + * leader so subthread's fs_struct state doesn't matter. + * + * PID 1 unsharing fs_struct state is a bug. PID 1 relies on + * various kthreads to be able to perform work based on its + * fs_struct state. Breaking that contract sucks for both sides. + * So just don't bother with extra work for this. No sane init + * system should ever do this. + */ +static inline void nullfs_userspace_init(struct fs_struct *old_fs) +{ + if (likely(current->pid != 1)) + return; + /* @old_fs may be dangling but for comparison it's fine */ + if (old_fs != &init_fs) + return; + pr_warn("VFS: Pid 1 stopped sharing filesystem state\n"); +} + +struct fs_struct *switch_fs_struct(struct fs_struct *new_fs) +{ + struct fs_struct *fs; + + fs = current->fs; + read_seqlock_excl(&fs->seq); + current->fs = new_fs; + if (--fs->users) + new_fs = NULL; + else + new_fs = fs; + read_sequnlock_excl(&fs->seq); + + nullfs_userspace_init(fs); + return new_fs; +} + /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 0070764b790a..ade459383f92 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -40,6 +40,8 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) read_sequnlock_excl(&fs->seq); } +struct fs_struct *switch_fs_struct(struct fs_struct *new_fs); + extern bool current_chrooted(void); static inline int current_umask(void) diff --git a/kernel/fork.c b/kernel/fork.c index 65113a304518..583078c69bbd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3123,7 +3123,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp */ int ksys_unshare(unsigned long unshare_flags) { - struct fs_struct *fs, *new_fs = NULL; + struct fs_struct *new_fs = NULL; struct files_struct *new_fd = NULL; struct cred *new_cred = NULL; struct nsproxy *new_nsproxy = NULL; @@ -3200,16 +3200,8 @@ int ksys_unshare(unsigned long unshare_flags) task_lock(current); - if (new_fs) { - fs = current->fs; - read_seqlock_excl(&fs->seq); - current->fs = new_fs; - if (--fs->users) - new_fs = NULL; - else - new_fs = fs; - read_sequnlock_excl(&fs->seq); - } + if (new_fs) + new_fs = switch_fs_struct(new_fs); if (new_fd) swap(current->files, new_fd); -- 2.47.3