Allowing sleepable programs to use tail calls. Making sure we can't mix sleepable and non-sleepable bpf programs in tail call map (BPF_MAP_TYPE_PROG_ARRAY) and allowing it to be used in sleepable programs. Sleepable programs can be preempted and sleep which might bring new source of race conditions, but both direct and indirect tail calls should not be affected. Direct tail calls work by patching direct jump to callee into bpf caller program, so no problem there. We atomically switch from nop to jump instruction. Indirect tail call reads the callee from the map and then jumps to it. The callee bpf program can't disappear (be released) from the caller, because it is executed under rcu lock (rcu_read_lock_trace). Signed-off-by: Jiri Olsa --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 4 +++- kernel/bpf/verifier.c | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5524f9429e76..3b0ceb759075 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -287,6 +287,7 @@ struct bpf_map_owner { enum bpf_prog_type type; bool jited; bool xdp_has_frags; + bool sleepable; u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; const struct btf_type *attach_func_proto; enum bpf_attach_type expected_attach_type; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e0b8a8a5aaa9..5ebece600aeb 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2401,6 +2401,7 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, map->owner->type = prog_type; map->owner->jited = fp->jited; map->owner->xdp_has_frags = aux->xdp_has_frags; + map->owner->sleepable = fp->sleepable; map->owner->expected_attach_type = fp->expected_attach_type; map->owner->attach_func_proto = aux->attach_func_proto; for_each_cgroup_storage_type(i) { @@ -2412,7 +2413,8 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, } else { ret = map->owner->type == prog_type && map->owner->jited == fp->jited && - map->owner->xdp_has_frags == aux->xdp_has_frags; + map->owner->xdp_has_frags == aux->xdp_has_frags && + map->owner->sleepable == fp->sleepable; if (ret && map->map_type == BPF_MAP_TYPE_PROG_ARRAY && map->owner->expected_attach_type != fp->expected_attach_type) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e7ff8394e0da..f185ebc6748d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21386,6 +21386,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_STACK: case BPF_MAP_TYPE_ARENA: case BPF_MAP_TYPE_INSN_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: break; default: verbose(env, -- 2.52.0 Adding test that makes sure we can't mix sleepable and non-sleepable bpf programs in the BPF_MAP_TYPE_PROG_ARRAY map and that we can do tail call in the sleepable program. Signed-off-by: Jiri Olsa --- .../selftests/bpf/prog_tests/tailcalls.c | 74 +++++++++++++++++++ .../selftests/bpf/progs/tailcall_sleepable.c | 43 +++++++++++ 2 files changed, 117 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tailcall_sleepable.c diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 0ab36503c3b2..7d534fde0af9 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -8,6 +8,7 @@ #include "tailcall_freplace.skel.h" #include "tc_bpf2bpf.skel.h" #include "tailcall_fail.skel.h" +#include "tailcall_sleepable.skel.h" /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -1653,6 +1654,77 @@ static void test_tailcall_failure() RUN_TESTS(tailcall_fail); } +noinline void uprobe_sleepable_trigger(void) +{ + asm volatile (""); +} + +static void test_tailcall_sleepable(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct tailcall_sleepable *skel; + int prog_fd, map_fd; + int err, key; + + skel = tailcall_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "tailcall_sleepable__open")) + return; + + /* + * Test that we can't load uprobe_normal and uprobe_sleepable_1, + * because they share tailcall map. + */ + bpf_program__set_autoload(skel->progs.uprobe_normal, true); + bpf_program__set_autoload(skel->progs.uprobe_sleepable_1, true); + + err = tailcall_sleepable__load(skel); + if (!ASSERT_ERR(err, "tailcall_sleepable__load")) + goto out; + + tailcall_sleepable__destroy(skel); + + /* + * Test that we can tail call from sleepable to sleepable program. + */ + skel = tailcall_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "tailcall_sleepable__open")) + return; + + bpf_program__set_autoload(skel->progs.uprobe_sleepable_1, true); + bpf_program__set_autoload(skel->progs.uprobe_sleepable_2, true); + + err = tailcall_sleepable__load(skel); + if (!ASSERT_OK(err, "tailcall_sleepable__load")) + goto out; + + /* Add sleepable uprobe_sleepable_2 to jmp_table[0]. */ + key = 0; + prog_fd = bpf_program__fd(skel->progs.uprobe_sleepable_2); + map_fd = bpf_map__fd(skel->maps.jmp_table); + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update jmp_table")) + goto out; + + skel->bss->my_pid = getpid(); + + /* Attach uprobe_sleepable_1 to uprobe_sleepable_trigger and hit it. */ + opts.func_name = "uprobe_sleepable_trigger"; + skel->links.uprobe_sleepable_1 = bpf_program__attach_uprobe_opts( + skel->progs.uprobe_sleepable_1, + -1, + "/proc/self/exe", + 0 /* offset */, + &opts); + if (!ASSERT_OK_PTR(skel->links.uprobe_sleepable_1, "bpf_program__attach_uprobe_opts")) + goto out; + + uprobe_sleepable_trigger(); + ASSERT_EQ(skel->bss->executed, 1, "executed"); + +out: + tailcall_sleepable__destroy(skel); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -1707,4 +1779,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_freplace(); if (test__start_subtest("tailcall_failure")) test_tailcall_failure(); + if (test__start_subtest("tailcall_sleepable")) + test_tailcall_sleepable(); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_sleepable.c b/tools/testing/selftests/bpf/progs/tailcall_sleepable.c new file mode 100644 index 000000000000..d959a9eaaa9c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_sleepable.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "bpf_misc.h" +#include "bpf_test_utils.h" + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __array(values, void (void)); +} jmp_table SEC(".maps"); + +SEC("?uprobe") +int uprobe_normal(void *ctx) +{ + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("?uprobe.s") +int uprobe_sleepable_1(void *ctx) +{ + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +int executed = 0; +int my_pid = 0; + +SEC("?uprobe.s") +int uprobe_sleepable_2(void *ctx) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + executed++; + return 0; +} + +char __license[] SEC("license") = "GPL"; -- 2.52.0