Add a umh field to struct kernel_clone_args. When set, copy_fs() copies from pid 1's fs_struct instead of the kthread's fs_struct. This ensures usermodehelper threads always get init's filesystem state regardless of their parent's (kthreadd's) fs. Usermodehelper threads are not allowed to create mount namespaces (CLONE_NEWNS), share filesystem state (CLONE_FS), or be started from a non-initial mount namespace. No usermodehelper currently does this so we don't need to worry about this restriction. Set .umh = 1 in user_mode_thread(). At this stage pid 1's fs points to rootfs which is the same as kthreadd's fs, so this is functionally equivalent. Signed-off-by: Christian Brauner --- include/linux/sched/task.h | 1 + kernel/fork.c | 25 +++++++++++++++++++++---- kernel/umh.c | 6 ++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 41ed884cffc9..e0c1ca8c6a18 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -31,6 +31,7 @@ struct kernel_clone_args { u32 io_thread:1; u32 user_worker:1; u32 no_files:1; + u32 umh:1; unsigned long stack; unsigned long stack_size; unsigned long tls; diff --git a/kernel/fork.c b/kernel/fork.c index 154703cf7d3d..f62b4c370f74 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1590,11 +1590,27 @@ static int copy_mm(u64 clone_flags, struct task_struct *tsk) return 0; } -static int copy_fs(u64 clone_flags, struct task_struct *tsk) +static int copy_fs(u64 clone_flags, struct task_struct *tsk, bool umh) { - struct fs_struct *fs = current->fs; + struct fs_struct *fs; + + /* + * Usermodehelper may use userspace_init_fs filesystem state but + * they don't get to create mount namespaces, share the + * filesystem state, or be started from a non-initial mount + * namespace. + */ + if (umh) { + if (clone_flags & (CLONE_NEWNS | CLONE_FS)) + return -EINVAL; + if (current->nsproxy->mnt_ns != &init_mnt_ns) + return -EINVAL; + fs = userspace_init_fs; + } else { + fs = current->fs; + VFS_WARN_ON_ONCE(current->fs != current->real_fs); + } - VFS_WARN_ON_ONCE(current->fs != current->real_fs); if (clone_flags & CLONE_FS) { /* tsk->fs is already what we want */ read_seqlock_excl(&fs->seq); @@ -2213,7 +2229,7 @@ __latent_entropy struct task_struct *copy_process( retval = copy_files(clone_flags, p, args->no_files); if (retval) goto bad_fork_cleanup_semundo; - retval = copy_fs(clone_flags, p); + retval = copy_fs(clone_flags, p, args->umh); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); @@ -2727,6 +2743,7 @@ pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags) .exit_signal = (flags & CSIGNAL), .fn = fn, .fn_arg = arg, + .umh = 1, }; return kernel_clone(&args); diff --git a/kernel/umh.c b/kernel/umh.c index cffda97d961c..d3f4b308b85d 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -71,10 +71,8 @@ static int call_usermodehelper_exec_async(void *data) spin_unlock_irq(¤t->sighand->siglock); /* - * Initial kernel threads share ther FS with init, in order to - * get the init root directory. But we've now created a new - * thread that is going to execve a user process and has its own - * 'struct fs_struct'. Reset umask to the default. + * Usermodehelper threads get a copy of userspace init's + * fs_struct. Reset umask to the default. */ current->fs->umask = 0022; -- 2.47.3