The bpf_cgroup_from_id kfunc relies on cgroup_get_from_id to obtain the cgroup corresponding to a given cgroup ID. This helper can be called in a lot of contexts where the current thread can be random. A recent example was its use in sched_ext's ops.tick(), to obtain the root cgroup pointer. Since the current task can be whatever random user space task preempted by the timer tick, this makes the behavior of the helper unreliable. Refactor out __cgroup_get_from_id as the non-namespace aware version of cgroup_get_from_id, and change bpf_cgroup_from_id to make use of it. There is no compatibility breakage here, since changing the namespace against which the lookup is being done to the root cgroup namespace only permits a wider set of lookups to succeed now. The cgroup IDs across namespaces are globally unique, and thus don't need to be retranslated. Reported-by: Dan Schatzberg Signed-off-by: Kumar Kartikeya Dwivedi --- include/linux/cgroup.h | 1 + kernel/bpf/helpers.c | 2 +- kernel/cgroup/cgroup.c | 24 ++++++++++++++++++++---- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b18fb5fcb38e..b08c8e62881c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -650,6 +650,7 @@ static inline void cgroup_kthread_ready(void) } void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); +struct cgroup *__cgroup_get_from_id(u64 id); struct cgroup *cgroup_get_from_id(u64 id); #else /* !CONFIG_CGROUPS */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index c0c0764a2025..51229aba5318 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2546,7 +2546,7 @@ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) { struct cgroup *cgrp; - cgrp = cgroup_get_from_id(cgid); + cgrp = __cgroup_get_from_id(cgid); if (IS_ERR(cgrp)) return NULL; return cgrp; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 312c6a8b55bb..6c2d20ac697c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6343,15 +6343,15 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) } /* - * cgroup_get_from_id : get the cgroup associated with cgroup id + * __cgroup_get_from_id : get the cgroup associated with cgroup id * @id: cgroup id * On success return the cgrp or ERR_PTR on failure - * Only cgroups within current task's cgroup NS are valid. + * There are no cgroup NS restrictions. */ -struct cgroup *cgroup_get_from_id(u64 id) +struct cgroup *__cgroup_get_from_id(u64 id) { struct kernfs_node *kn; - struct cgroup *cgrp, *root_cgrp; + struct cgroup *cgrp; kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id); if (!kn) @@ -6373,6 +6373,22 @@ struct cgroup *cgroup_get_from_id(u64 id) if (!cgrp) return ERR_PTR(-ENOENT); + return cgrp; +} + +/* + * cgroup_get_from_id : get the cgroup associated with cgroup id + * @id: cgroup id + * On success return the cgrp or ERR_PTR on failure + * Only cgroups within current task's cgroup NS are valid. + */ +struct cgroup *cgroup_get_from_id(u64 id) +{ + struct cgroup *cgrp, *root_cgrp; + + cgrp = __cgroup_get_from_id(id); + if (IS_ERR(cgrp)) + return cgrp; root_cgrp = current_cgns_cgroup_dfl(); if (!cgroup_is_descendant(cgrp, root_cgrp)) { -- 2.51.0 Make sure that we only switch the cgroup namespace and enter a new cgroup in a child process separate from test_progs, to not mess up the environment for subsequent tests. To remove this cgroup, we need to wait for the child to exit, and then rmdir its cgroup. If the read call fails, or waitpid succeeds, we know the child exited (read call would fail when the last pipe end is closed, otherwise waitpid waits until exit(2) is called). We then invoke a newly introduced remove_cgroup_pid() helper, that identifies cgroup path using the passed in pid of the now dead child, instead of using the current process pid (getpid()). Signed-off-by: Kumar Kartikeya Dwivedi --- tools/testing/selftests/bpf/cgroup_helpers.c | 20 ++++++ tools/testing/selftests/bpf/cgroup_helpers.h | 1 + .../selftests/bpf/prog_tests/cgrp_kfunc.c | 71 +++++++++++++++++++ .../selftests/bpf/progs/cgrp_kfunc_success.c | 12 ++++ 4 files changed, 104 insertions(+) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 15f626014872..20cede4db3ce 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -412,6 +412,26 @@ void remove_cgroup(const char *relative_path) log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); } +/* + * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID + * @relative_path: The cgroup path, relative to the workdir, to remove + * @pid: PID to be used to find cgroup_path + * + * This function expects a cgroup to already be created, relative to the cgroup + * work dir. It also expects the cgroup doesn't have any children or live + * processes and it removes the cgroup. + * + * On failure, it will print an error to stderr. + */ +void remove_cgroup_pid(const char *relative_path, int pid) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path_pid(cgroup_path, relative_path, pid); + if (rmdir(cgroup_path)) + log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); +} + /** * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD * @relative_path: The cgroup path, relative to the workdir, to join diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 182e1ac36c95..3857304be874 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -19,6 +19,7 @@ int cgroup_setup_and_join(const char *relative_path); int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); void remove_cgroup(const char *relative_path); +void remove_cgroup_pid(const char *relative_path, int pid); unsigned long long get_cgroup_id(const char *relative_path); int get_cgroup1_hierarchy_id(const char *subsys_name); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c index adda85f97058..4b42fbc96efc 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -4,6 +4,8 @@ #define _GNU_SOURCE #include #include +#include +#include #include "cgrp_kfunc_failure.skel.h" #include "cgrp_kfunc_success.skel.h" @@ -87,6 +89,72 @@ static const char * const success_tests[] = { "test_cgrp_from_id", }; +static void test_cgrp_from_id_ns(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct cgrp_kfunc_success *skel; + struct bpf_program *prog; + int pid, pipe_fd[2]; + + skel = open_load_cgrp_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err")) + goto cleanup; + + prog = skel->progs.test_cgrp_from_id_ns; + + if (!ASSERT_OK(pipe(pipe_fd), "pipe")) + goto cleanup; + + pid = fork(); + if (!ASSERT_GE(pid, 0, "fork result")) { + close(pipe_fd[0]); + close(pipe_fd[1]); + goto cleanup; + } + + if (pid == 0) { + int ret = 0; + + close(pipe_fd[0]); + + if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup")) + exit(1); + + if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns")) + exit(1); + + ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(ret, "test run ret")) + exit(1); + + if (!ASSERT_OK(opts.retval, "test run retval")) + exit(1); + + if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe")) + exit(1); + + exit(0); + } else { + int res; + + close(pipe_fd[1]); + + ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res"); + ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child"); + + remove_cgroup_pid("cgrp_from_id_ns", pid); + + ASSERT_OK(res, "result from run"); + } + + close(pipe_fd[0]); +cleanup: + cgrp_kfunc_success__destroy(skel); +} + void test_cgrp_kfunc(void) { int i, err; @@ -102,6 +170,9 @@ void test_cgrp_kfunc(void) run_success_test(success_tests[i]); } + if (test__start_subtest("test_cgrp_from_id_ns")) + test_cgrp_from_id_ns(); + RUN_TESTS(cgrp_kfunc_failure); cleanup: diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index 5354455a01be..02d8f160ca0e 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -221,3 +221,15 @@ int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path) return 0; } + +SEC("syscall") +int test_cgrp_from_id_ns(void *ctx) +{ + struct cgroup *cg; + + cg = bpf_cgroup_from_id(1); + if (!cg) + return 42; + bpf_cgroup_release(cg); + return 0; +} -- 2.51.0