The bpf_cgroup_from_id kfunc relies on cgroup_get_from_id to obtain the cgroup corresponding to a given cgroup ID. This helper can be called in a lot of contexts where the current thread can be random. A recent example was its use in sched_ext's ops.tick(), to obtain the root cgroup pointer. Since the current task can be whatever random user space task preempted by the timer tick, this makes the behavior of the helper unreliable. Resolve this by refactoring cgroup_get_from_id to take a parameter to elide the cgroup_is_descendant check when root_cgns parameter is set to true. There is no compatibility breakage here, since changing the namespace against which the lookup is being done to the root cgroup namespace only permits a wider set of lookups to succeed now. The cgroup IDs across namespaces are globally unique, and thus don't need to be retranslated. Reported-by: Dan Schatzberg Acked-by: Tejun Heo Signed-off-by: Kumar Kartikeya Dwivedi --- include/linux/cgroup.h | 2 +- kernel/bpf/cgroup_iter.c | 2 +- kernel/bpf/helpers.c | 2 +- kernel/cgroup/cgroup.c | 7 ++++++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b18fb5fcb38e..da757a496fbe 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -650,7 +650,7 @@ static inline void cgroup_kthread_ready(void) } void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); -struct cgroup *cgroup_get_from_id(u64 id); +struct cgroup *cgroup_get_from_id(u64 id, bool root_cgns); #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c index f04a468cf6a7..49234d035583 100644 --- a/kernel/bpf/cgroup_iter.c +++ b/kernel/bpf/cgroup_iter.c @@ -212,7 +212,7 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog, if (fd) cgrp = cgroup_v1v2_get_from_fd(fd); else if (id) - cgrp = cgroup_get_from_id(id); + cgrp = cgroup_get_from_id(id, false); else /* walk the entire hierarchy by default. */ cgrp = cgroup_get_from_path("/"); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6b4877e85a68..12466103917f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2537,7 +2537,7 @@ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) { struct cgroup *cgrp; - cgrp = cgroup_get_from_id(cgid); + cgrp = cgroup_get_from_id(cgid, true); if (IS_ERR(cgrp)) return NULL; return cgrp; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 312c6a8b55bb..b490e1e0d2c4 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6345,10 +6345,11 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) /* * cgroup_get_from_id : get the cgroup associated with cgroup id * @id: cgroup id + * @root_cgns: Select root cgroup namespace instead of current's. * On success return the cgrp or ERR_PTR on failure * Only cgroups within current task's cgroup NS are valid. */ -struct cgroup *cgroup_get_from_id(u64 id) +struct cgroup *cgroup_get_from_id(u64 id, bool root_cgns) { struct kernfs_node *kn; struct cgroup *cgrp, *root_cgrp; @@ -6374,6 +6375,10 @@ struct cgroup *cgroup_get_from_id(u64 id) if (!cgrp) return ERR_PTR(-ENOENT); + /* We don't need to namespace this operation against current. */ + if (root_cgns) + return cgrp; + root_cgrp = current_cgns_cgroup_dfl(); if (!cgroup_is_descendant(cgrp, root_cgrp)) { cgroup_put(cgrp); -- 2.47.3