The fsession is something that similar to kprobe session. It allow to attach a single BPF program to both the entry and the exit of the target functions. Introduce the struct bpf_fsession_link, which allows to add the link to both the fentry and fexit progs_hlist of the trampoline. Signed-off-by: Menglong Dong Co-developed-by: Leon Hwang Signed-off-by: Leon Hwang --- v5: - unify the name to "fsession" - use more explicit way in __bpf_trampoline_link_prog() v4: - instead of adding a new hlist to progs_hlist in trampoline, add the bpf program to both the fentry hlist and the fexit hlist. --- include/linux/bpf.h | 19 +++++++++ include/uapi/linux/bpf.h | 1 + kernel/bpf/btf.c | 2 + kernel/bpf/syscall.c | 18 ++++++++- kernel/bpf/trampoline.c | 40 ++++++++++++++++--- kernel/bpf/verifier.c | 12 ++++-- net/bpf/test_run.c | 1 + net/core/bpf_sk_storage.c | 1 + tools/include/uapi/linux/bpf.h | 1 + .../bpf/prog_tests/tracing_failure.c | 2 +- 10 files changed, 87 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5936f8e2996f..41228b0add52 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1309,6 +1309,7 @@ enum bpf_tramp_prog_type { BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ + BPF_TRAMP_FSESSION, }; struct bpf_tramp_image { @@ -1875,6 +1876,11 @@ struct bpf_tracing_link { struct bpf_prog *tgt_prog; }; +struct bpf_fsession_link { + struct bpf_tracing_link link; + struct bpf_tramp_link fexit; +}; + struct bpf_raw_tp_link { struct bpf_link link; struct bpf_raw_event_map *btp; @@ -2169,6 +2175,19 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op #endif +static inline int bpf_fsession_cnt(struct bpf_tramp_links *links) +{ + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY]; + int cnt = 0; + + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) { + if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION) + cnt++; + } + + return cnt; +} + int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, const struct bpf_ctx_arg_aux *info, u32 cnt); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2a2ade4be60f..44e7dbc278e3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1145,6 +1145,7 @@ enum bpf_attach_type { BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, BPF_TRACE_UPROBE_SESSION, + BPF_TRACE_FSESSION, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 539c9fdea41d..8b1dcd440356 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6107,6 +6107,7 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: + case BPF_TRACE_FSESSION: /* allow u64* as ctx */ if (btf_is_int(t) && t->size == 8) return 0; @@ -6704,6 +6705,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, fallthrough; case BPF_LSM_CGROUP: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: /* When LSM programs are attached to void LSM hooks * they use FEXIT trampolines and when attached to * int LSM hooks, they use MODIFY_RETURN trampolines. diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ecc0929ce462..c65e7a70cb78 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3577,6 +3577,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, case BPF_PROG_TYPE_TRACING: if (prog->expected_attach_type != BPF_TRACE_FENTRY && prog->expected_attach_type != BPF_TRACE_FEXIT && + prog->expected_attach_type != BPF_TRACE_FSESSION && prog->expected_attach_type != BPF_MODIFY_RETURN) { err = -EINVAL; goto out_put_prog; @@ -3626,7 +3627,21 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); } - link = kzalloc(sizeof(*link), GFP_USER); + if (prog->expected_attach_type == BPF_TRACE_FSESSION) { + struct bpf_fsession_link *fslink; + + fslink = kzalloc(sizeof(*fslink), GFP_USER); + if (fslink) { + bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING, + &bpf_tracing_link_lops, prog, attach_type); + fslink->fexit.cookie = bpf_cookie; + link = &fslink->link; + } else { + link = NULL; + } + } else { + link = kzalloc(sizeof(*link), GFP_USER); + } if (!link) { err = -ENOMEM; goto out_put_prog; @@ -4350,6 +4365,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: case BPF_MODIFY_RETURN: return BPF_PROG_TYPE_TRACING; case BPF_LSM_MAC: diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 2a125d063e62..11e043049d68 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -111,7 +111,7 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog) return (ptype == BPF_PROG_TYPE_TRACING && (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || - eatype == BPF_MODIFY_RETURN)) || + eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION)) || (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); } @@ -559,6 +559,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) return BPF_TRAMP_MODIFY_RETURN; case BPF_TRACE_FEXIT: return BPF_TRAMP_FEXIT; + case BPF_TRACE_FSESSION: + return BPF_TRAMP_FSESSION; case BPF_LSM_MAC: if (!prog->aux->attach_func_proto->type) /* The function returns void, we cannot modify its @@ -596,6 +598,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, { enum bpf_tramp_prog_type kind; struct bpf_tramp_link *link_exiting; + struct bpf_fsession_link *fslink; + struct hlist_head *prog_list; int err = 0; int cnt = 0, i; @@ -621,24 +625,44 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, BPF_MOD_JUMP, NULL, link->link.prog->bpf_func); } + if (kind == BPF_TRAMP_FSESSION) { + prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY]; + cnt++; + } else { + prog_list = &tr->progs_hlist[kind]; + } if (cnt >= BPF_MAX_TRAMP_LINKS) return -E2BIG; if (!hlist_unhashed(&link->tramp_hlist)) /* prog already linked */ return -EBUSY; - hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) { + hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) { if (link_exiting->link.prog != link->link.prog) continue; /* prog already linked */ return -EBUSY; } - hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); - tr->progs_cnt[kind]++; + hlist_add_head(&link->tramp_hlist, prog_list); + if (kind == BPF_TRAMP_FSESSION) { + tr->progs_cnt[BPF_TRAMP_FENTRY]++; + fslink = container_of(link, struct bpf_fsession_link, link.link); + hlist_add_head(&fslink->fexit.tramp_hlist, + &tr->progs_hlist[BPF_TRAMP_FEXIT]); + tr->progs_cnt[BPF_TRAMP_FEXIT]++; + } else { + tr->progs_cnt[kind]++; + } err = bpf_trampoline_update(tr, true /* lock_direct_mutex */); if (err) { hlist_del_init(&link->tramp_hlist); - tr->progs_cnt[kind]--; + if (kind == BPF_TRAMP_FSESSION) { + tr->progs_cnt[BPF_TRAMP_FENTRY]--; + hlist_del_init(&fslink->fexit.tramp_hlist); + tr->progs_cnt[BPF_TRAMP_FEXIT]--; + } else { + tr->progs_cnt[kind]--; + } } return err; } @@ -659,6 +683,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr, struct bpf_prog *tgt_prog) { + struct bpf_fsession_link *fslink; enum bpf_tramp_prog_type kind; int err; @@ -672,6 +697,11 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, guard(mutex)(&tgt_prog->aux->ext_mutex); tgt_prog->aux->is_extended = false; return err; + } else if (kind == BPF_TRAMP_FSESSION) { + fslink = container_of(link, struct bpf_fsession_link, link.link); + hlist_del_init(&fslink->fexit.tramp_hlist); + tr->progs_cnt[BPF_TRAMP_FEXIT]--; + kind = BPF_TRAMP_FENTRY; } hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 53635ea2e41b..774c9b0aafa3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17403,6 +17403,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char switch (env->prog->expected_attach_type) { case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: range = retval_range(0, 0); break; case BPF_TRACE_RAW_TP: @@ -23300,6 +23301,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) if (prog_type == BPF_PROG_TYPE_TRACING && insn->imm == BPF_FUNC_get_func_ret) { if (eatype == BPF_TRACE_FEXIT || + eatype == BPF_TRACE_FSESSION || eatype == BPF_MODIFY_RETURN) { /* Load nr_args from ctx - 8 */ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); @@ -24244,7 +24246,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (tgt_prog->type == BPF_PROG_TYPE_TRACING && prog_extension && (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || - tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) { + tgt_prog->expected_attach_type == BPF_TRACE_FEXIT || + tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) { /* Program extensions can extend all program types * except fentry/fexit. The reason is the following. * The fentry/fexit programs are used for performance @@ -24259,7 +24262,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, * beyond reasonable stack size. Hence extending fentry * is not allowed. */ - bpf_log(log, "Cannot extend fentry/fexit\n"); + bpf_log(log, "Cannot extend fentry/fexit/fsession\n"); return -EINVAL; } } else { @@ -24343,6 +24346,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, case BPF_LSM_CGROUP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: if (!btf_type_is_func(t)) { bpf_log(log, "attach_btf_id %u is not a function\n", btf_id); @@ -24509,6 +24513,7 @@ static bool can_be_sleepable(struct bpf_prog *prog) case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: case BPF_TRACE_ITER: + case BPF_TRACE_FSESSION: return true; default: return false; @@ -24590,9 +24595,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) tgt_info.tgt_name); return -EINVAL; } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT || + prog->expected_attach_type == BPF_TRACE_FSESSION || prog->expected_attach_type == BPF_MODIFY_RETURN) && btf_id_set_contains(&noreturn_deny, btf_id)) { - verbose(env, "Attaching fexit/fmod_ret to __noreturn function '%s' is rejected.\n", + verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n", tgt_info.tgt_name); return -EINVAL; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 655efac6f133..3b0d9bd039de 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -685,6 +685,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: if (bpf_fentry_test1(1) != 2 || bpf_fentry_test2(2, 3) != 5 || bpf_fentry_test3(4, 5, 6) != 15 || diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 850dd736ccd1..de111818f3a0 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -365,6 +365,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) return true; case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage", strlen("bpf_sk_storage")); default: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b816bc53d2e1..3ca7d76e05f0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1145,6 +1145,7 @@ enum bpf_attach_type { BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, BPF_TRACE_UPROBE_SESSION, + BPF_TRACE_FSESSION, __MAX_BPF_ATTACH_TYPE }; diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c index 10e231965589..f9f9e1cb87bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -73,7 +73,7 @@ static void test_tracing_deny(void) static void test_fexit_noreturns(void) { test_tracing_fail_prog("fexit_noreturns", - "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected."); + "Attaching fexit/fsession/fmod_ret to __noreturn function 'do_exit' is rejected."); } void test_tracing_failure(void) -- 2.52.0 For now, ctx[-1] is used to store the nr_args in the trampoline. However, 1-byte is enough to store such information. Therefore, we use only the last byte of ctx[-1] to store the nr_args, and reserve the rest for other usages. Signed-off-by: Menglong Dong --- v8: - fix the missed get_func_arg_cnt --- kernel/bpf/verifier.c | 35 +++++++++++++++++++---------------- kernel/trace/bpf_trace.c | 6 +++--- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 774c9b0aafa3..bfff3f84fd91 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23277,15 +23277,16 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_get_func_arg) { /* Load nr_args from ctx - 8 */ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); - insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); - insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); - insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); - insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); - insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); - insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); - insn_buf[7] = BPF_JMP_A(1); - insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); - cnt = 9; + insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); + insn_buf[2] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); + insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); + insn_buf[4] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); + insn_buf[5] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); + insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); + insn_buf[7] = BPF_MOV64_IMM(BPF_REG_0, 0); + insn_buf[8] = BPF_JMP_A(1); + insn_buf[9] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); + cnt = 10; new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) @@ -23305,12 +23306,13 @@ static int do_misc_fixups(struct bpf_verifier_env *env) eatype == BPF_MODIFY_RETURN) { /* Load nr_args from ctx - 8 */ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); - insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); - insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); - insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); - insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); - insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0); - cnt = 6; + insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); + insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); + insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); + insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); + insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); + insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); + cnt = 7; } else { insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); cnt = 1; @@ -23331,8 +23333,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_get_func_arg_cnt) { /* Load nr_args from ctx - 8 */ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); - new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 2); if (!new_prog) return -ENOMEM; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6e076485bf70..5f621f0403f8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1194,7 +1194,7 @@ const struct bpf_func_proto bpf_get_branch_snapshot_proto = { BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) { /* This helper call is inlined by verifier. */ - u64 nr_args = ((u64 *)ctx)[-1]; + u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; if ((u64) n >= nr_args) return -EINVAL; @@ -1214,7 +1214,7 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = { BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) { /* This helper call is inlined by verifier. */ - u64 nr_args = ((u64 *)ctx)[-1]; + u64 nr_args = ((u64 *)ctx)[-1] & 0xFF; *value = ((u64 *)ctx)[nr_args]; return 0; @@ -1231,7 +1231,7 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = { BPF_CALL_1(get_func_arg_cnt, void *, ctx) { /* This helper call is inlined by verifier. */ - return ((u64 *)ctx)[-1]; + return ((u64 *)ctx)[-1] & 0xFF; } static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { -- 2.52.0 Add the function argument of "void *ctx" to bpf_session_cookie() and bpf_session_is_return(), which is a preparation of the next patch. The two kfunc is seldom used now, so it will not introduce much effect to change their function prototype. Signed-off-by: Menglong Dong --- kernel/trace/bpf_trace.c | 4 ++-- tools/testing/selftests/bpf/bpf_kfuncs.h | 4 ++-- .../bpf/progs/kprobe_multi_session_cookie.c | 12 ++++++------ .../selftests/bpf/progs/uprobe_multi_session.c | 4 ++-- .../bpf/progs/uprobe_multi_session_cookie.c | 12 ++++++------ .../bpf/progs/uprobe_multi_session_recursive.c | 8 ++++---- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5f621f0403f8..297dcafb2c55 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3316,7 +3316,7 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) __bpf_kfunc_start_defs(); -__bpf_kfunc bool bpf_session_is_return(void) +__bpf_kfunc bool bpf_session_is_return(void *ctx) { struct bpf_session_run_ctx *session_ctx; @@ -3324,7 +3324,7 @@ __bpf_kfunc bool bpf_session_is_return(void) return session_ctx->is_return; } -__bpf_kfunc __u64 *bpf_session_cookie(void) +__bpf_kfunc __u64 *bpf_session_cookie(void *ctx) { struct bpf_session_run_ctx *session_ctx; diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index e0189254bb6e..dc495cb4c22e 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -79,8 +79,8 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, struct bpf_dynptr *sig_ptr, struct bpf_key *trusted_keyring) __ksym; -extern bool bpf_session_is_return(void) __ksym __weak; -extern __u64 *bpf_session_cookie(void) __ksym __weak; +extern bool bpf_session_is_return(void *ctx) __ksym __weak; +extern __u64 *bpf_session_cookie(void *ctx) __ksym __weak; struct dentry; /* Description diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c index 0835b5edf685..4981d29e3907 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c @@ -23,16 +23,16 @@ int BPF_PROG(trigger) return 0; } -static int check_cookie(__u64 val, __u64 *result) +static int check_cookie(struct pt_regs *ctx, __u64 val, __u64 *result) { __u64 *cookie; if (bpf_get_current_pid_tgid() >> 32 != pid) return 1; - cookie = bpf_session_cookie(); + cookie = bpf_session_cookie(ctx); - if (bpf_session_is_return()) + if (bpf_session_is_return(ctx)) *result = *cookie == val ? val : 0; else *cookie = val; @@ -42,17 +42,17 @@ static int check_cookie(__u64 val, __u64 *result) SEC("kprobe.session/bpf_fentry_test1") int test_kprobe_1(struct pt_regs *ctx) { - return check_cookie(1, &test_kprobe_1_result); + return check_cookie(ctx, 1, &test_kprobe_1_result); } SEC("kprobe.session/bpf_fentry_test1") int test_kprobe_2(struct pt_regs *ctx) { - return check_cookie(2, &test_kprobe_2_result); + return check_cookie(ctx, 2, &test_kprobe_2_result); } SEC("kprobe.session/bpf_fentry_test1") int test_kprobe_3(struct pt_regs *ctx) { - return check_cookie(3, &test_kprobe_3_result); + return check_cookie(ctx, 3, &test_kprobe_3_result); } diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c index 30bff90b68dc..a06c2d7ec022 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c @@ -51,7 +51,7 @@ static int uprobe_multi_check(void *ctx, bool is_return) SEC("uprobe.session//proc/self/exe:uprobe_multi_func_*") int uprobe(struct pt_regs *ctx) { - return uprobe_multi_check(ctx, bpf_session_is_return()); + return uprobe_multi_check(ctx, bpf_session_is_return(ctx)); } static __always_inline bool verify_sleepable_user_copy(void) @@ -67,5 +67,5 @@ int uprobe_sleepable(struct pt_regs *ctx) { if (verify_sleepable_user_copy()) uprobe_multi_sleep_result++; - return uprobe_multi_check(ctx, bpf_session_is_return()); + return uprobe_multi_check(ctx, bpf_session_is_return(ctx)); } diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c index 5befdf944dc6..d916d5017233 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c @@ -13,16 +13,16 @@ __u64 test_uprobe_1_result = 0; __u64 test_uprobe_2_result = 0; __u64 test_uprobe_3_result = 0; -static int check_cookie(__u64 val, __u64 *result) +static int check_cookie(struct pt_regs *ctx, __u64 val, __u64 *result) { __u64 *cookie; if (bpf_get_current_pid_tgid() >> 32 != pid) return 1; - cookie = bpf_session_cookie(); + cookie = bpf_session_cookie(ctx); - if (bpf_session_is_return()) + if (bpf_session_is_return(ctx)) *result = *cookie == val ? val : 0; else *cookie = val; @@ -32,17 +32,17 @@ static int check_cookie(__u64 val, __u64 *result) SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") int uprobe_1(struct pt_regs *ctx) { - return check_cookie(1, &test_uprobe_1_result); + return check_cookie(ctx, 1, &test_uprobe_1_result); } SEC("uprobe.session//proc/self/exe:uprobe_multi_func_2") int uprobe_2(struct pt_regs *ctx) { - return check_cookie(2, &test_uprobe_2_result); + return check_cookie(ctx, 2, &test_uprobe_2_result); } SEC("uprobe.session//proc/self/exe:uprobe_multi_func_3") int uprobe_3(struct pt_regs *ctx) { - return check_cookie(3, &test_uprobe_3_result); + return check_cookie(ctx, 3, &test_uprobe_3_result); } diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c index 8fbcd69fae22..d3d682512b69 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c @@ -16,11 +16,11 @@ int idx_return = 0; __u64 test_uprobe_cookie_entry[6]; __u64 test_uprobe_cookie_return[3]; -static int check_cookie(void) +static int check_cookie(struct pt_regs *ctx) { - __u64 *cookie = bpf_session_cookie(); + __u64 *cookie = bpf_session_cookie(ctx); - if (bpf_session_is_return()) { + if (bpf_session_is_return(ctx)) { if (idx_return >= ARRAY_SIZE(test_uprobe_cookie_return)) return 1; test_uprobe_cookie_return[idx_return++] = *cookie; @@ -40,5 +40,5 @@ int uprobe_recursive(struct pt_regs *ctx) if (bpf_get_current_pid_tgid() >> 32 != pid) return 1; - return check_cookie(); + return check_cookie(ctx); } -- 2.52.0 If fsession exists, we will use the bit (1 << BPF_TRAMP_M_IS_RETURN) in ctx[-1] to store the "is_return" flag. The logic of bpf_session_is_return() for fsession is implemented in the verifier by inline following code: bool bpf_session_is_return(void *ctx) { return !!(((u64 *)ctx)[-1] & (1 << BPF_TRAMP_M_IS_RETURN)); } Signed-off-by: Menglong Dong Co-developed-by: Leon Hwang Signed-off-by: Leon Hwang --- v9: - remove the definition of bpf_fsession_is_return() v7: - reuse the kfunc bpf_session_is_return() instead of introduce new kfunc v4: - split out the bpf_fsession_cookie() to another patch v3: - merge the bpf_tracing_is_exit and bpf_fsession_cookie into a single patch v2: - store the session flags after return value, instead of before nr_args - inline the bpf_tracing_is_exit, as Jiri suggested --- include/linux/bpf.h | 3 +++ kernel/bpf/verifier.c | 15 ++++++++++++++- kernel/trace/bpf_trace.c | 28 +++++++++++++++++----------- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 41228b0add52..2640ec2157e1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1229,6 +1229,9 @@ enum { #endif }; +#define BPF_TRAMP_M_NR_ARGS 0 +#define BPF_TRAMP_M_IS_RETURN 8 + struct bpf_tramp_links { struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; int nr_links; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bfff3f84fd91..1b0292a03186 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12374,6 +12374,7 @@ enum special_kfunc_type { KF_bpf_arena_alloc_pages, KF_bpf_arena_free_pages, KF_bpf_arena_reserve_pages, + KF_bpf_session_is_return, }; BTF_ID_LIST(special_kfunc_list) @@ -12451,6 +12452,7 @@ BTF_ID(func, bpf_task_work_schedule_resume_impl) BTF_ID(func, bpf_arena_alloc_pages) BTF_ID(func, bpf_arena_free_pages) BTF_ID(func, bpf_arena_reserve_pages) +BTF_ID(func, bpf_session_is_return) static bool is_task_work_add_kfunc(u32 func_id) { @@ -12505,7 +12507,8 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg = ®s[regno]; bool arg_mem_size = false; - if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) + if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || + meta->func_id == special_kfunc_list[KF_bpf_session_is_return]) return KF_ARG_PTR_TO_CTX; if (argno + 1 < nargs && @@ -22558,6 +22561,16 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; + } else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] && + env->prog->expected_attach_type == BPF_TRACE_FSESSION) { + /* implement and inline the bpf_session_is_return() for + * fsession, and the logic is: + * return !!(((u64 *)ctx)[-1] & (1 << BPF_TRAMP_M_IS_RETURN)) + */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_IS_RETURN); + insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1); + *cnt = 3; } if (env->insn_aux_data[insn_idx].arg_prog) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 297dcafb2c55..1fe508d451b7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3334,34 +3334,40 @@ __bpf_kfunc __u64 *bpf_session_cookie(void *ctx) __bpf_kfunc_end_defs(); -BTF_KFUNCS_START(kprobe_multi_kfunc_set_ids) +BTF_KFUNCS_START(session_kfunc_set_ids) BTF_ID_FLAGS(func, bpf_session_is_return) BTF_ID_FLAGS(func, bpf_session_cookie) -BTF_KFUNCS_END(kprobe_multi_kfunc_set_ids) +BTF_KFUNCS_END(session_kfunc_set_ids) -static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id) +static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id) { - if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids, kfunc_id)) + if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id)) return 0; - if (!is_kprobe_session(prog) && !is_uprobe_session(prog)) + if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && + prog->expected_attach_type != BPF_TRACE_FSESSION) return -EACCES; return 0; } -static const struct btf_kfunc_id_set bpf_kprobe_multi_kfunc_set = { +static const struct btf_kfunc_id_set bpf_session_kfunc_set = { .owner = THIS_MODULE, - .set = &kprobe_multi_kfunc_set_ids, - .filter = bpf_kprobe_multi_filter, + .set = &session_kfunc_set_ids, + .filter = bpf_session_filter, }; -static int __init bpf_kprobe_multi_kfuncs_init(void) +static int __init bpf_trace_kfuncs_init(void) { - return register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_kprobe_multi_kfunc_set); + int err = 0; + + err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set); + err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set); + + return err; } -late_initcall(bpf_kprobe_multi_kfuncs_init); +late_initcall(bpf_trace_kfuncs_init); typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk); -- 2.52.0 Implement session cookie for fsession. In order to limit the stack usage, we make 4 as the maximum of the cookie count. The offset of the current cookie is stored in the "(ctx[-1] >> BPF_TRAMP_M_COOKIE) & 0xFF". Therefore, we can get the session cookie with ctx[-offset]. The stack will look like this: return value -> 8 bytes argN -> 8 bytes ... arg1 -> 8 bytes nr_args -> 8 bytes ip (optional) -> 8 bytes cookie2 -> 8 bytes cookie1 -> 8 bytes Implement and inline the bpf_session_cookie() for the fsession in the verifier. Signed-off-by: Menglong Dong --- v9: - remove the definition of bpf_fsession_cookie() v7: - reuse bpf_session_cookie() instead of introduce new kfunc v5: - remove "cookie_cnt" in struct bpf_trampoline v4: - limit the maximum of the cookie count to 4 - store the session cookies before nr_regs in stack --- include/linux/bpf.h | 15 +++++++++++++++ kernel/bpf/trampoline.c | 13 +++++++++++-- kernel/bpf/verifier.c | 22 +++++++++++++++++++++- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2640ec2157e1..a416050e0dd2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1231,6 +1231,7 @@ enum { #define BPF_TRAMP_M_NR_ARGS 0 #define BPF_TRAMP_M_IS_RETURN 8 +#define BPF_TRAMP_M_COOKIE 9 struct bpf_tramp_links { struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; @@ -1783,6 +1784,7 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ + call_session_cookie:1, /* Do we call bpf_session_cookie() */ tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ @@ -2191,6 +2193,19 @@ static inline int bpf_fsession_cnt(struct bpf_tramp_links *links) return cnt; } +static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links) +{ + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY]; + int cnt = 0; + + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) { + if (fentries.links[i]->link.prog->call_session_cookie) + cnt++; + } + + return cnt; +} + int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, const struct bpf_ctx_arg_aux *info, u32 cnt); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 11e043049d68..29b4e00d860c 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -592,6 +592,8 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog) return 0; } +#define BPF_TRAMP_MAX_COOKIES 4 + static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr, struct bpf_prog *tgt_prog) @@ -600,7 +602,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tramp_link *link_exiting; struct bpf_fsession_link *fslink; struct hlist_head *prog_list; - int err = 0; + int err = 0, cookie_cnt = 0; int cnt = 0, i; kind = bpf_attach_type_to_tramp(link->link.prog); @@ -637,11 +639,18 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, /* prog already linked */ return -EBUSY; hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) { - if (link_exiting->link.prog != link->link.prog) + if (link_exiting->link.prog != link->link.prog) { + if (kind == BPF_TRAMP_FSESSION && + link_exiting->link.prog->call_session_cookie) + cookie_cnt++; continue; + } /* prog already linked */ return -EBUSY; } + if (link->link.prog->call_session_cookie && + cookie_cnt >= BPF_TRAMP_MAX_COOKIES) + return -E2BIG; hlist_add_head(&link->tramp_hlist, prog_list); if (kind == BPF_TRAMP_FSESSION) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1b0292a03186..b91fd8af2393 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12508,7 +12508,8 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, bool arg_mem_size = false; if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || - meta->func_id == special_kfunc_list[KF_bpf_session_is_return]) + meta->func_id == special_kfunc_list[KF_bpf_session_is_return] || + meta->func_id == special_kfunc_list[KF_bpf_session_cookie]) return KF_ARG_PTR_TO_CTX; if (argno + 1 < nargs && @@ -14294,6 +14295,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return err; } + if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) + env->prog->call_session_cookie = true; + return 0; } @@ -22571,6 +22575,22 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_IS_RETURN); insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1); *cnt = 3; + } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] && + env->prog->expected_attach_type == BPF_TRACE_FSESSION) { + /* inline bpf_session_cookie() for fsession: + * __u64 *bpf_session_cookie(void *ctx) + * { + * u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_M_COOKIE) & 0xFF; + * return &((u64 *)ctx)[-off]; + * } + */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_COOKIE); + insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); + insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); + insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1); + insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0); + *cnt = 6; } if (env->insn_aux_data[insn_idx].arg_prog) { -- 2.52.0 Introduce the helper emit_store_stack_imm64(), which is used to store a imm64 to the stack with the help of r0. Signed-off-by: Menglong Dong --- v9: - rename emit_st_r0_imm64() to emit_store_stack_imm64() --- arch/x86/net/bpf_jit_comp.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e3b1c4b1d550..d94f7038c441 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1300,6 +1300,15 @@ static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm) emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm); } +static void emit_store_stack_imm64(u8 **pprog, int stack_off, u64 imm64) +{ + /* mov rax, imm64 + * mov QWORD PTR [rbp - stack_off], rax + */ + emit_mov_imm64(pprog, BPF_REG_0, imm64 >> 32, (u32) imm64); + emit_stx(pprog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_off); +} + static int emit_atomic_rmw(u8 **pprog, u32 atomic_op, u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) { @@ -3352,16 +3361,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * mov rax, nr_regs * mov QWORD PTR [rbp - nregs_off], rax */ - emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs); - emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off); + emit_store_stack_imm64(&prog, nregs_off, nr_regs); if (flags & BPF_TRAMP_F_IP_ARG) { /* Store IP address of the traced function: * movabsq rax, func_addr * mov QWORD PTR [rbp - ip_off], rax */ - emit_mov_imm64(&prog, BPF_REG_0, (long) func_addr >> 32, (u32) (long) func_addr); - emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); + emit_store_stack_imm64(&prog, ip_off, (long)func_addr); } save_args(m, &prog, regs_off, false, flags); -- 2.52.0 Add BPF_TRACE_FSESSION supporting to x86_64, including: 1. clear the return value in the stack before fentry to make the fentry of the fsession can only get 0 with bpf_get_func_ret(). 2. clear all the session cookies' value in the stack. 2. store the index of the cookie to ctx[-1] before the calling to fsession 3. store the "is_return" flag to ctx[-1] before the calling to fexit of the fsession. Signed-off-by: Menglong Dong Co-developed-by: Leon Hwang Signed-off-by: Leon Hwang --- v5: - add the variable "func_meta" - define cookie_off in a new line v4: - some adjustment to the 1st patch, such as we get the fsession prog from fentry and fexit hlist - remove the supporting of skipping fexit with fentry return non-zero v2: - add session cookie support - add the session stuff after return value, instead of before nr_args --- arch/x86/net/bpf_jit_comp.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d94f7038c441..0671a434c00d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3094,12 +3094,17 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_links *tl, int stack_size, int run_ctx_off, bool save_ret, - void *image, void *rw_image) + void *image, void *rw_image, u64 func_meta) { int i; u8 *prog = *pprog; for (i = 0; i < tl->nr_links; i++) { + if (tl->links[i]->link.prog->call_session_cookie) { + /* 'stack_size + 8' is the offset of func_md in stack */ + emit_store_stack_imm64(&prog, stack_size + 8, func_meta); + func_meta -= (1 << BPF_TRAMP_M_COOKIE); + } if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, save_ret, image, rw_image)) return -EINVAL; @@ -3222,7 +3227,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; void *orig_call = func_addr; + int cookie_off, cookie_cnt; u8 **branches = NULL; + u64 func_meta; u8 *prog; bool save_ret; @@ -3290,6 +3297,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im ip_off = stack_size; + cookie_cnt = bpf_fsession_cookie_cnt(tlinks); + /* room for session cookies */ + stack_size += cookie_cnt * 8; + cookie_off = stack_size; + stack_size += 8; rbx_off = stack_size; @@ -3383,9 +3395,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } } + if (bpf_fsession_cnt(tlinks)) { + /* clear all the session cookies' value */ + for (int i = 0; i < cookie_cnt; i++) + emit_store_stack_imm64(&prog, cookie_off - 8 * i, 0); + /* clear the return value to make sure fentry always get 0 */ + emit_store_stack_imm64(&prog, 8, 0); + } + func_meta = nr_regs + (((cookie_off - regs_off) / 8) << BPF_TRAMP_M_COOKIE); + if (fentry->nr_links) { if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, - flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image)) + flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image, + func_meta)) return -EINVAL; } @@ -3445,9 +3467,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } } + /* set the "is_return" flag for fsession */ + func_meta += (1 << BPF_TRAMP_M_IS_RETURN); + if (bpf_fsession_cnt(tlinks)) + emit_store_stack_imm64(&prog, nregs_off, func_meta); + if (fexit->nr_links) { if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, - false, image, rw_image)) { + false, image, rw_image, func_meta)) { ret = -EINVAL; goto cleanup; } -- 2.52.0 Add BPF_TRACE_FSESSION to libbpf and bpftool. Signed-off-by: Menglong Dong --- v5: - remove the handling of BPF_TRACE_SESSION in legacy fallback path for BPF_RAW_TRACEPOINT_OPEN - use fsession terminology consistently --- tools/bpf/bpftool/common.c | 1 + tools/lib/bpf/bpf.c | 1 + tools/lib/bpf/libbpf.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index e8daf963ecef..8bfcff9e2f63 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -1191,6 +1191,7 @@ const char *bpf_attach_type_input_str(enum bpf_attach_type t) case BPF_TRACE_FENTRY: return "fentry"; case BPF_TRACE_FEXIT: return "fexit"; case BPF_MODIFY_RETURN: return "mod_ret"; + case BPF_TRACE_FSESSION: return "fsession"; case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select"; case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate"; default: return libbpf_bpf_attach_type_str(t); diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 21b57a629916..5846de364209 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -794,6 +794,7 @@ int bpf_link_create(int prog_fd, int target_fd, case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: + case BPF_TRACE_FSESSION: case BPF_LSM_MAC: attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); if (!OPTS_ZEROED(opts, tracing)) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6ea81701e274..6564b0e02909 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -115,6 +115,7 @@ static const char * const attach_type_name[] = { [BPF_TRACE_FENTRY] = "trace_fentry", [BPF_TRACE_FEXIT] = "trace_fexit", [BPF_MODIFY_RETURN] = "modify_return", + [BPF_TRACE_FSESSION] = "trace_fsession", [BPF_LSM_MAC] = "lsm_mac", [BPF_LSM_CGROUP] = "lsm_cgroup", [BPF_SK_LOOKUP] = "sk_lookup", @@ -9859,6 +9860,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fsession+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fsession.s+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), -- 2.52.0 Add testcases for BPF_TRACE_FSESSION. The function arguments and return value are tested both in the entry and exit. And the kfunc bpf_session_is_ret() is also tested. As the layout of the stack changed for fsession, so we also test bpf_get_func_ip() for it. Signed-off-by: Menglong Dong --- v3: - restructure the testcase by combine the testcases for session cookie and get_func_ip into one patch --- .../selftests/bpf/prog_tests/fsession_test.c | 90 ++++++++++++++ .../selftests/bpf/progs/fsession_test.c | 110 ++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/fsession_test.c create mode 100644 tools/testing/selftests/bpf/progs/fsession_test.c diff --git a/tools/testing/selftests/bpf/prog_tests/fsession_test.c b/tools/testing/selftests/bpf/prog_tests/fsession_test.c new file mode 100644 index 000000000000..83f3953a1ff6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fsession_test.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 ChinaTelecom */ +#include +#include "fsession_test.skel.h" + +static int check_result(struct fsession_test *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* Trigger test function calls */ + prog_fd = bpf_program__fd(skel->progs.test1); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return err; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return topts.retval; + + for (int i = 0; i < sizeof(*skel->bss) / sizeof(__u64); i++) { + if (!ASSERT_EQ(((__u64 *)skel->bss)[i], 1, "test_result")) + return -EINVAL; + } + + return 0; +} + +static void test_fsession_basic(void) +{ + struct fsession_test *skel = NULL; + int err; + + skel = fsession_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fsession_test__open_and_load")) + goto cleanup; + + err = fsession_test__attach(skel); + if (!ASSERT_OK(err, "fsession_attach")) + goto cleanup; + + check_result(skel); +cleanup: + fsession_test__destroy(skel); +} + +static void test_fsession_reattach(void) +{ + struct fsession_test *skel = NULL; + int err; + + skel = fsession_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fsession_test__open_and_load")) + goto cleanup; + + /* First attach */ + err = fsession_test__attach(skel); + if (!ASSERT_OK(err, "fsession_first_attach")) + goto cleanup; + + if (check_result(skel)) + goto cleanup; + + /* Detach */ + fsession_test__detach(skel); + + /* Reset counters */ + memset(skel->bss, 0, sizeof(*skel->bss)); + + /* Second attach */ + err = fsession_test__attach(skel); + if (!ASSERT_OK(err, "fsession_second_attach")) + goto cleanup; + + if (check_result(skel)) + goto cleanup; + +cleanup: + fsession_test__destroy(skel); +} + +void test_fsession_test(void) +{ +#if !defined(__x86_64__) + test__skip(); + return; +#endif + if (test__start_subtest("fsession_basic")) + test_fsession_basic(); + if (test__start_subtest("fsession_reattach")) + test_fsession_reattach(); +} diff --git a/tools/testing/selftests/bpf/progs/fsession_test.c b/tools/testing/selftests/bpf/progs/fsession_test.c new file mode 100644 index 000000000000..f504984d42f2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fsession_test.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 ChinaTelecom */ +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +__u64 test1_entry_result = 0; +__u64 test1_exit_result = 0; + +SEC("fsession/bpf_fentry_test1") +int BPF_PROG(test1, int a, int ret) +{ + bool is_exit = bpf_session_is_return(ctx); + + if (!is_exit) { + test1_entry_result = a == 1 && ret == 0; + return 0; + } + + test1_exit_result = a == 1 && ret == 2; + return 0; +} + +__u64 test2_entry_result = 0; +__u64 test2_exit_result = 0; + +SEC("fsession/bpf_fentry_test3") +int BPF_PROG(test2, char a, int b, __u64 c, int ret) +{ + bool is_exit = bpf_session_is_return(ctx); + + if (!is_exit) { + test2_entry_result = a == 4 && b == 5 && c == 6 && ret == 0; + return 0; + } + + test2_exit_result = a == 4 && b == 5 && c == 6 && ret == 15; + return 0; +} + +__u64 test3_entry_result = 0; +__u64 test3_exit_result = 0; + +SEC("fsession/bpf_fentry_test4") +int BPF_PROG(test3, void *a, char b, int c, __u64 d, int ret) +{ + bool is_exit = bpf_session_is_return(ctx); + + if (!is_exit) { + test3_entry_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && ret == 0; + return 0; + } + + test3_exit_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && ret == 34; + return 0; +} + +__u64 test4_entry_result = 0; +__u64 test4_exit_result = 0; + +SEC("fsession/bpf_fentry_test5") +int BPF_PROG(test4, __u64 a, void *b, short c, int d, __u64 e, int ret) +{ + bool is_exit = bpf_session_is_return(ctx); + + if (!is_exit) { + test4_entry_result = a == 11 && b == (void *)12 && c == 13 && d == 14 && + e == 15 && ret == 0; + return 0; + } + + test4_exit_result = a == 11 && b == (void *)12 && c == 13 && d == 14 && + e == 15 && ret == 65; + return 0; +} + +__u64 test5_entry_result = 0; +__u64 test5_exit_result = 0; + +SEC("fsession/bpf_fentry_test7") +int BPF_PROG(test5, struct bpf_fentry_test_t *arg, int ret) +{ + bool is_exit = bpf_session_is_return(ctx); + + if (!is_exit) { + if (!arg) + test5_entry_result = ret == 0; + return 0; + } + + if (!arg) + test5_exit_result = 1; + return 0; +} + +__u64 test6_entry_result = 0; +__u64 test6_exit_result = 0; +SEC("fsession/bpf_fentry_test1") +int BPF_PROG(test6, int a) +{ + __u64 addr = bpf_get_func_ip(ctx); + + if (bpf_session_is_return(ctx)) + test6_exit_result = (const void *) addr == &bpf_fentry_test1; + else + test6_entry_result = (const void *) addr == &bpf_fentry_test1; + return 0; +} -- 2.52.0 Test session cookie for fsession. Multiple fsession BPF progs is attached to bpf_fentry_test1() and session cookie is read and write in the testcase. Signed-off-by: Menglong Dong --- v3: - restructure the testcase by combine the testcases for session cookie and get_func_ip into one patch --- .../selftests/bpf/prog_tests/fsession_test.c | 25 +++++++ .../selftests/bpf/progs/fsession_test.c | 72 +++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/fsession_test.c b/tools/testing/selftests/bpf/prog_tests/fsession_test.c index 83f3953a1ff6..2459f9db1c92 100644 --- a/tools/testing/selftests/bpf/prog_tests/fsession_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fsession_test.c @@ -77,6 +77,29 @@ static void test_fsession_reattach(void) fsession_test__destroy(skel); } +static void test_fsession_cookie(void) +{ + struct fsession_test *skel = NULL; + int err; + + skel = fsession_test__open(); + if (!ASSERT_OK_PTR(skel, "fsession_test__open")) + goto cleanup; + + err = bpf_program__set_autoload(skel->progs.test11, true); + if (!ASSERT_OK(err, "bpf_program__set_autoload")) + goto cleanup; + + err = fsession_test__load(skel); + if (!ASSERT_OK(err, "fsession_test__load")) + goto cleanup; + + err = fsession_test__attach(skel); + ASSERT_EQ(err, -E2BIG, "fsession_cookie"); +cleanup: + fsession_test__destroy(skel); +} + void test_fsession_test(void) { #if !defined(__x86_64__) @@ -87,4 +110,6 @@ void test_fsession_test(void) test_fsession_basic(); if (test__start_subtest("fsession_reattach")) test_fsession_reattach(); + if (test__start_subtest("fsession_cookie")) + test_fsession_cookie(); } diff --git a/tools/testing/selftests/bpf/progs/fsession_test.c b/tools/testing/selftests/bpf/progs/fsession_test.c index f504984d42f2..85e89f7219a7 100644 --- a/tools/testing/selftests/bpf/progs/fsession_test.c +++ b/tools/testing/selftests/bpf/progs/fsession_test.c @@ -108,3 +108,75 @@ int BPF_PROG(test6, int a) test6_entry_result = (const void *) addr == &bpf_fentry_test1; return 0; } + +__u64 test7_entry_ok = 0; +__u64 test7_exit_ok = 0; +SEC("fsession/bpf_fentry_test1") +int BPF_PROG(test7, int a) +{ + __u64 *cookie = bpf_session_cookie(ctx); + + if (!bpf_session_is_return(ctx)) { + *cookie = 0xAAAABBBBCCCCDDDDull; + test7_entry_ok = *cookie == 0xAAAABBBBCCCCDDDDull; + return 0; + } + + test7_exit_ok = *cookie == 0xAAAABBBBCCCCDDDDull; + return 0; +} + +__u64 test8_entry_ok = 0; +__u64 test8_exit_ok = 0; + +SEC("fsession/bpf_fentry_test1") +int BPF_PROG(test8, int a) +{ + __u64 *cookie = bpf_session_cookie(ctx); + + if (!bpf_session_is_return(ctx)) { + *cookie = 0x1111222233334444ull; + test8_entry_ok = *cookie == 0x1111222233334444ull; + return 0; + } + + test8_exit_ok = *cookie == 0x1111222233334444ull; + return 0; +} + +__u64 test9_entry_result = 0; +__u64 test9_exit_result = 0; + +SEC("fsession/bpf_fentry_test1") +int BPF_PROG(test9, int a, int ret) +{ + __u64 *cookie = bpf_session_cookie(ctx); + + if (!bpf_session_is_return(ctx)) { + test9_entry_result = a == 1 && ret == 0; + *cookie = 0x123456ULL; + return 0; + } + + test9_exit_result = a == 1 && ret == 2 && *cookie == 0x123456ULL; + return 0; +} + +SEC("fsession/bpf_fentry_test1") +int BPF_PROG(test10, int a, int ret) +{ + __u64 *cookie = bpf_session_cookie(ctx); + + *cookie = 0; + return 0; +} + +/* This is the 5th cookie, so it should fail */ +SEC("?fsession/bpf_fentry_test1") +int BPF_PROG(test11, int a, int ret) +{ + __u64 *cookie = bpf_session_cookie(ctx); + + *cookie = 0; + return 0; +} -- 2.52.0 Test the fsession when it is used together with fentry, fexit. Signed-off-by: Menglong Dong --- .../testing/selftests/bpf/progs/fsession_test.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/fsession_test.c b/tools/testing/selftests/bpf/progs/fsession_test.c index 85e89f7219a7..c14dc0ed28e9 100644 --- a/tools/testing/selftests/bpf/progs/fsession_test.c +++ b/tools/testing/selftests/bpf/progs/fsession_test.c @@ -180,3 +180,19 @@ int BPF_PROG(test11, int a, int ret) *cookie = 0; return 0; } + +__u64 test12_result = 0; +SEC("fexit/bpf_fentry_test1") +int BPF_PROG(test12, int a, int ret) +{ + test12_result = a == 1 && ret == 2; + return 0; +} + +__u64 test13_result = 0; +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test13, int a) +{ + test13_result = a == 1; + return 0; +} -- 2.52.0