Add a hook to manage attaching tasks to cgroup. I'm in the process of
adding various "universal truth" bpf programs to systemd that will make
use of this.

This has been a long-standing request (cf. [1] and [2]). It will allow us to
enforce cgroup migrations and ensure that services can never escape their
cgroups. This is just one of many use-cases.

Link: https://github.com/systemd/systemd/issues/6356 [1]
Link: https://github.com/systemd/systemd/issues/22874 [2]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/bpf_lsm.h | 15 +++++++++++++++
 kernel/bpf/bpf_lsm.c    | 12 ++++++++++++
 kernel/cgroup/cgroup.c  | 18 +++++++++++-------
 3 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 5ae438fdf567..bc1d35b271f5 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -12,8 +12,11 @@
 #include <linux/bpf_verifier.h>
 #include <linux/lsm_hooks.h>
 
+struct cgroup;
+struct cgroup_namespace;
 struct ns_common;
 struct nsset;
+struct super_block;
 
 #ifdef CONFIG_BPF_LSM
 
@@ -55,6 +58,9 @@ int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
 int bpf_lsm_namespace_alloc(struct ns_common *ns);
 void bpf_lsm_namespace_free(struct ns_common *ns);
 int bpf_lsm_namespace_install(struct nsset *nsset, struct ns_common *ns);
+int bpf_lsm_cgroup_attach(struct task_struct *task, struct cgroup *src_cgrp,
+			   struct cgroup *dst_cgrp, struct super_block *sb,
+			   bool threadgroup, struct cgroup_namespace *ns);
 
 int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str,
 				const struct bpf_dynptr *value_p, int flags);
@@ -125,6 +131,15 @@ static inline int bpf_lsm_namespace_install(struct nsset *nsset,
 {
 	return 0;
 }
+static inline int bpf_lsm_cgroup_attach(struct task_struct *task,
+					 struct cgroup *src_cgrp,
+					 struct cgroup *dst_cgrp,
+					 struct super_block *sb,
+					 bool threadgroup,
+					 struct cgroup_namespace *ns)
+{
+	return 0;
+}
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index f6378db46220..1da5585082fa 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -47,6 +47,16 @@ __weak noinline int bpf_lsm_namespace_install(struct nsset *nsset,
 	return 0;
 }
 
+__weak noinline int bpf_lsm_cgroup_attach(struct task_struct *task,
+					   struct cgroup *src_cgrp,
+					   struct cgroup *dst_cgrp,
+					   struct super_block *sb,
+					   bool threadgroup,
+					   struct cgroup_namespace *ns)
+{
+	return 0;
+}
+
 __bpf_hook_end();
 
 #define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME)
@@ -56,6 +66,7 @@ BTF_SET_START(bpf_lsm_hooks)
 BTF_ID(func, bpf_lsm_namespace_alloc)
 BTF_ID(func, bpf_lsm_namespace_free)
 BTF_ID(func, bpf_lsm_namespace_install)
+BTF_ID(func, bpf_lsm_cgroup_attach)
 BTF_SET_END(bpf_lsm_hooks)
 
 BTF_SET_START(bpf_lsm_disabled_hooks)
@@ -407,6 +418,7 @@ BTF_ID(func, bpf_lsm_task_to_inode)
 BTF_ID(func, bpf_lsm_userns_create)
 BTF_ID(func, bpf_lsm_namespace_alloc)
 BTF_ID(func, bpf_lsm_namespace_install)
+BTF_ID(func, bpf_lsm_cgroup_attach)
 BTF_SET_END(sleepable_lsm_hooks)
 
 BTF_SET_START(untrusted_lsm_hooks)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 8af4351536cf..16535349b22f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -28,6 +28,7 @@
 #include "cgroup-internal.h"
 
 #include <linux/bpf-cgroup.h>
+#include <linux/bpf_lsm.h>
 #include <linux/cred.h>
 #include <linux/errno.h>
 #include <linux/init_task.h>
@@ -5334,7 +5335,8 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
 	return 0;
 }
 
-static int cgroup_attach_permissions(struct cgroup *src_cgrp,
+static int cgroup_attach_permissions(struct task_struct *task,
+				     struct cgroup *src_cgrp,
 				     struct cgroup *dst_cgrp,
 				     struct super_block *sb, bool threadgroup,
 				     struct cgroup_namespace *ns)
@@ -5350,9 +5352,9 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp,
 		return ret;
 
 	if (!threadgroup && (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp))
-		ret = -EOPNOTSUPP;
+		return -EOPNOTSUPP;
 
-	return ret;
+	return bpf_lsm_cgroup_attach(task, src_cgrp, dst_cgrp, sb, threadgroup, ns);
 }
 
 static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
@@ -5384,7 +5386,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 	 * inherited fd attacks.
 	 */
 	scoped_with_creds(of->file->f_cred)
-		ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
+		ret = cgroup_attach_permissions(task, src_cgrp, dst_cgrp,
 						of->file->f_path.dentry->d_sb,
 						threadgroup, ctx->ns);
 	if (ret)
@@ -6669,6 +6671,7 @@ static struct cgroup *cgroup_get_from_file(struct file *f)
 
 /**
  * cgroup_css_set_fork - find or create a css_set for a child process
+ * @task: the task to be attached
  * @kargs: the arguments passed to create the child process
  *
  * This functions finds or creates a new css_set which the child
@@ -6683,7 +6686,8 @@ static struct cgroup *cgroup_get_from_file(struct file *f)
  * before grabbing cgroup_threadgroup_rwsem and will hold a reference
  * to the target cgroup.
  */
-static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
+static int cgroup_css_set_fork(struct task_struct *task,
+			       struct kernel_clone_args *kargs)
 	__acquires(&cgroup_mutex) __acquires(&cgroup_threadgroup_rwsem)
 {
 	int ret;
@@ -6752,7 +6756,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
 	 * cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us
 	 * to always use the caller's credentials.
 	 */
-	ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
+	ret = cgroup_attach_permissions(task, cset->dfl_cgrp, dst_cgrp, sb,
 					!(kargs->flags & CLONE_THREAD),
 					current->nsproxy->cgroup_ns);
 	if (ret)
@@ -6824,7 +6828,7 @@ int cgroup_can_fork(struct task_struct *child, struct kernel_clone_args *kargs)
 	struct cgroup_subsys *ss;
 	int i, j, ret;
 
-	ret = cgroup_css_set_fork(kargs);
+	ret = cgroup_css_set_fork(child, kargs);
 	if (ret)
 		return ret;
 

-- 
2.47.3