Add a hook to manage attaching tasks to cgroup. I'm in the process of adding various "universal truth" bpf programs to systemd that will make use of this. This has been a long-standing request (cf. [1] and [2]). It will allow us to enforce cgroup migrations and ensure that services can never escape their cgroups. This is just one of many use-cases. Link: https://github.com/systemd/systemd/issues/6356 [1] Link: https://github.com/systemd/systemd/issues/22874 [2] Signed-off-by: Christian Brauner --- include/linux/bpf_lsm.h | 15 +++++++++++++++ kernel/bpf/bpf_lsm.c | 12 ++++++++++++ kernel/cgroup/cgroup.c | 18 +++++++++++------- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 5ae438fdf567..bc1d35b271f5 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -12,8 +12,11 @@ #include #include +struct cgroup; +struct cgroup_namespace; struct ns_common; struct nsset; +struct super_block; #ifdef CONFIG_BPF_LSM @@ -55,6 +58,9 @@ int bpf_lsm_get_retval_range(const struct bpf_prog *prog, int bpf_lsm_namespace_alloc(struct ns_common *ns); void bpf_lsm_namespace_free(struct ns_common *ns); int bpf_lsm_namespace_install(struct nsset *nsset, struct ns_common *ns); +int bpf_lsm_cgroup_attach(struct task_struct *task, struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, struct super_block *sb, + bool threadgroup, struct cgroup_namespace *ns); int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, const struct bpf_dynptr *value_p, int flags); @@ -125,6 +131,15 @@ static inline int bpf_lsm_namespace_install(struct nsset *nsset, { return 0; } +static inline int bpf_lsm_cgroup_attach(struct task_struct *task, + struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, + struct super_block *sb, + bool threadgroup, + struct cgroup_namespace *ns) +{ + return 0; +} #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index f6378db46220..1da5585082fa 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -47,6 +47,16 @@ __weak noinline int bpf_lsm_namespace_install(struct nsset *nsset, return 0; } +__weak noinline int bpf_lsm_cgroup_attach(struct task_struct *task, + struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, + struct super_block *sb, + bool threadgroup, + struct cgroup_namespace *ns) +{ + return 0; +} + __bpf_hook_end(); #define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME) @@ -56,6 +66,7 @@ BTF_SET_START(bpf_lsm_hooks) BTF_ID(func, bpf_lsm_namespace_alloc) BTF_ID(func, bpf_lsm_namespace_free) BTF_ID(func, bpf_lsm_namespace_install) +BTF_ID(func, bpf_lsm_cgroup_attach) BTF_SET_END(bpf_lsm_hooks) BTF_SET_START(bpf_lsm_disabled_hooks) @@ -407,6 +418,7 @@ BTF_ID(func, bpf_lsm_task_to_inode) BTF_ID(func, bpf_lsm_userns_create) BTF_ID(func, bpf_lsm_namespace_alloc) BTF_ID(func, bpf_lsm_namespace_install) +BTF_ID(func, bpf_lsm_cgroup_attach) BTF_SET_END(sleepable_lsm_hooks) BTF_SET_START(untrusted_lsm_hooks) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8af4351536cf..16535349b22f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -28,6 +28,7 @@ #include "cgroup-internal.h" #include +#include #include #include #include @@ -5334,7 +5335,8 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, return 0; } -static int cgroup_attach_permissions(struct cgroup *src_cgrp, +static int cgroup_attach_permissions(struct task_struct *task, + struct cgroup *src_cgrp, struct cgroup *dst_cgrp, struct super_block *sb, bool threadgroup, struct cgroup_namespace *ns) @@ -5350,9 +5352,9 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp, return ret; if (!threadgroup && (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)) - ret = -EOPNOTSUPP; + return -EOPNOTSUPP; - return ret; + return bpf_lsm_cgroup_attach(task, src_cgrp, dst_cgrp, sb, threadgroup, ns); } static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, @@ -5384,7 +5386,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, * inherited fd attacks. */ scoped_with_creds(of->file->f_cred) - ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, + ret = cgroup_attach_permissions(task, src_cgrp, dst_cgrp, of->file->f_path.dentry->d_sb, threadgroup, ctx->ns); if (ret) @@ -6669,6 +6671,7 @@ static struct cgroup *cgroup_get_from_file(struct file *f) /** * cgroup_css_set_fork - find or create a css_set for a child process + * @task: the task to be attached * @kargs: the arguments passed to create the child process * * This functions finds or creates a new css_set which the child @@ -6683,7 +6686,8 @@ static struct cgroup *cgroup_get_from_file(struct file *f) * before grabbing cgroup_threadgroup_rwsem and will hold a reference * to the target cgroup. */ -static int cgroup_css_set_fork(struct kernel_clone_args *kargs) +static int cgroup_css_set_fork(struct task_struct *task, + struct kernel_clone_args *kargs) __acquires(&cgroup_mutex) __acquires(&cgroup_threadgroup_rwsem) { int ret; @@ -6752,7 +6756,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) * cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us * to always use the caller's credentials. */ - ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, + ret = cgroup_attach_permissions(task, cset->dfl_cgrp, dst_cgrp, sb, !(kargs->flags & CLONE_THREAD), current->nsproxy->cgroup_ns); if (ret) @@ -6824,7 +6828,7 @@ int cgroup_can_fork(struct task_struct *child, struct kernel_clone_args *kargs) struct cgroup_subsys *ss; int i, j, ret; - ret = cgroup_css_set_fork(kargs); + ret = cgroup_css_set_fork(child, kargs); if (ret) return ret; -- 2.47.3