Add BPF_TRACE_SESSION supporting to x86_64. invoke_bpf_session_entry and invoke_bpf_session_exit is introduced for this purpose. In invoke_bpf_session_entry(), we will check if the return value of the fentry is 0, and clear the corresponding flag if not. And in invoke_bpf_session_exit(), we will check if the corresponding flag is set. If not set, the fexit will be skipped. Signed-off-by: Menglong Dong Co-developed-by: Leon Hwang Signed-off-by: Leon Hwang --- arch/x86/net/bpf_jit_comp.c | 115 +++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d4c93d9e73e4..0586b96ed529 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3108,6 +3108,97 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, return 0; } +static int invoke_bpf_session_entry(const struct btf_func_model *m, u8 **pprog, + struct bpf_tramp_links *tl, int stack_size, + int run_ctx_off, int session_off, + void *image, void *rw_image) +{ + u64 session_flags; + u8 *prog = *pprog; + u8 *jmp_insn; + int i; + + /* clear the session flags: + * + * xor rax, rax + * mov QWORD PTR [rbp - session_off], rax + */ + EMIT3(0x48, 0x31, 0xC0); + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -session_off); + + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true, + image, rw_image)) + return -EINVAL; + + /* fentry prog stored return value into [rbp - 8]. Emit: + * if (*(u64 *)(rbp - 8) != 0) + * *(u64 *)(rbp - session_off) |= (1 << (i + 1)); + */ + /* cmp QWORD PTR [rbp - 0x8], 0x0 */ + EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00); + /* emit 2 nops that will be replaced with JE insn */ + jmp_insn = prog; + emit_nops(&prog, 2); + + session_flags = (1ULL << (i + 1)); + /* mov rax, $session_flags */ + emit_mov_imm64(&prog, BPF_REG_0, session_flags >> 32, (u32) session_flags); + /* or QWORD PTR [rbp - session_off], rax */ + EMIT2(0x48, 0x09); + emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -session_off); + + jmp_insn[0] = X86_JE; + jmp_insn[1] = prog - jmp_insn - 2; + } + + *pprog = prog; + return 0; +} + +static int invoke_bpf_session_exit(const struct btf_func_model *m, u8 **pprog, + struct bpf_tramp_links *tl, int stack_size, + int run_ctx_off, int session_off, + void *image, void *rw_image) +{ + u64 session_flags; + u8 *prog = *pprog; + u8 *jmp_insn; + int i; + + /* set the bpf_trace_is_exit flag to the session flags */ + /* mov rax, 1 */ + emit_mov_imm32(&prog, false, BPF_REG_0, 1); + /* or QWORD PTR [rbp - session_off], rax */ + EMIT2(0x48, 0x09); + emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -session_off); + + for (i = 0; i < tl->nr_links; i++) { + /* check if (1 << (i+1)) is set in the session flags, and + * skip the execution of the fexit program if it is. + */ + session_flags = 1ULL << (i + 1); + /* mov rax, $session_flags */ + emit_mov_imm64(&prog, BPF_REG_1, session_flags >> 32, (u32) session_flags); + /* test QWORD PTR [rbp - session_off], rax */ + EMIT2(0x48, 0x85); + emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_1, -session_off); + /* emit 2 nops that will be replaced with JE insn */ + jmp_insn = prog; + emit_nops(&prog, 2); + + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, false, + image, rw_image)) + return -EINVAL; + + jmp_insn[0] = X86_JNE; + jmp_insn[1] = prog - jmp_insn - 2; + } + + *pprog = prog; + return 0; +} + /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack) \ __LOAD_TCC_PTR(-round_up(stack, 8) - 8) @@ -3179,8 +3270,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im void *func_addr) { int i, ret, nr_regs = m->nr_args, stack_size = 0; - int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off; + int regs_off, nregs_off, session_off, ip_off, run_ctx_off, + arg_stack_off, rbx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; void *orig_call = func_addr; @@ -3222,6 +3315,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * * RBP - nregs_off [ regs count ] always * + * RBP - session_off [ session flags ] tracing session + * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * * RBP - rbx_off [ rbx value ] always @@ -3246,6 +3341,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* regs count */ stack_size += 8; nregs_off = stack_size; + stack_size += 8; + session_off = stack_size; if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; /* room for IP address argument */ @@ -3345,6 +3442,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im return -EINVAL; } + if (session->nr_links) { + if (invoke_bpf_session_entry(m, &prog, session, regs_off, + run_ctx_off, session_off, + image, rw_image)) + return -EINVAL; + } + if (fmod_ret->nr_links) { branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *), GFP_KERNEL); @@ -3409,6 +3513,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } } + if (session->nr_links) { + if (invoke_bpf_session_exit(m, &prog, session, regs_off, + run_ctx_off, session_off, + image, rw_image)) { + ret = -EINVAL; + goto cleanup; + } + } + if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_regs(m, &prog, regs_off); -- 2.51.0