BPF programs detect recursion by doing atomic inc/dec on a per-cpu active counter from the trampoline. Create two helpers for operations on this active counter, this makes it easy to changes the recursion detection logic in future. This commit makes no functional changes. Acked-by: Yonghong Song Signed-off-by: Puranjay Mohan --- include/linux/bpf.h | 10 ++++++++++ kernel/bpf/trampoline.c | 8 ++++---- kernel/trace/bpf_trace.c | 4 ++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bb3847caeae1..2da986136d26 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2004,6 +2004,16 @@ struct bpf_struct_ops_common_value { enum bpf_struct_ops_state state; }; +static inline bool bpf_prog_get_recursion_context(struct bpf_prog *prog) +{ + return this_cpu_inc_return(*(prog->active)) == 1; +} + +static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog) +{ + this_cpu_dec(*(prog->active)); +} + #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) /* This macro helps developer to register a struct_ops type and generate * type information correctly. Developers should use this macro to register diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 976d89011b15..2a125d063e62 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -949,7 +949,7 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); - if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { + if (unlikely(!bpf_prog_get_recursion_context(prog))) { bpf_prog_inc_misses_counter(prog); if (prog->aux->recursion_detected) prog->aux->recursion_detected(prog); @@ -993,7 +993,7 @@ static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start, bpf_reset_run_ctx(run_ctx->saved_run_ctx); update_prog_stats(prog, start); - this_cpu_dec(*(prog->active)); + bpf_prog_put_recursion_context(prog); rcu_read_unlock_migrate(); } @@ -1029,7 +1029,7 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); - if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { + if (unlikely(!bpf_prog_get_recursion_context(prog))) { bpf_prog_inc_misses_counter(prog); if (prog->aux->recursion_detected) prog->aux->recursion_detected(prog); @@ -1044,7 +1044,7 @@ void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, bpf_reset_run_ctx(run_ctx->saved_run_ctx); update_prog_stats(prog, start); - this_cpu_dec(*(prog->active)); + bpf_prog_put_recursion_context(prog); migrate_enable(); rcu_read_unlock_trace(); } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index fe28d86f7c35..6e076485bf70 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2063,7 +2063,7 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) struct bpf_trace_run_ctx run_ctx; cant_sleep(); - if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { + if (unlikely(!bpf_prog_get_recursion_context(prog))) { bpf_prog_inc_misses_counter(prog); goto out; } @@ -2077,7 +2077,7 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) bpf_reset_run_ctx(old_run_ctx); out: - this_cpu_dec(*(prog->active)); + bpf_prog_put_recursion_context(prog); } #define UNPACK(...) __VA_ARGS__ -- 2.47.3 BPF programs detect recursion using a per-CPU 'active' flag in struct bpf_prog. The trampoline currently sets/clears this flag with atomic operations. On some arm64 platforms (e.g., Neoverse V2 with LSE), per-CPU atomic operations are relatively slow. Unlike x86_64 - where per-CPU updates can avoid cross-core atomicity, arm64 LSE atomics are always atomic across all cores, which is unnecessary overhead for strictly per-CPU state. This patch removes atomics from the recursion detection path on arm64 by changing 'active' to a per-CPU array of four u8 counters, one per context: {NMI, hard-irq, soft-irq, normal}. The running context uses a non-atomic increment/decrement on its element. After increment, recursion is detected by reading the array as a u32 and verifying that only the expected element changed; any change in another element indicates inter-context recursion, and a value > 1 in the same element indicates same-context recursion. For example, starting from {0,0,0,0}, a normal-context trigger changes the array to {0,0,0,1}. If an NMI arrives on the same CPU and triggers the program, the array becomes {1,0,0,1}. When the NMI context checks the u32 against the expected mask for normal (0x00000001), it observes 0x01000001 and correctly reports recursion. Same-context recursion is detected analogously. Acked-by: Yonghong Song Signed-off-by: Puranjay Mohan --- include/linux/bpf.h | 32 +++++++++++++++++++++++++++++--- kernel/bpf/core.c | 3 ++- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2da986136d26..da6a00dd313f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1746,6 +1746,8 @@ struct bpf_prog_aux { struct bpf_map __rcu *st_ops_assoc; }; +#define BPF_NR_CONTEXTS 4 /* normal, softirq, hardirq, NMI */ + struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ @@ -1772,7 +1774,7 @@ struct bpf_prog { u8 tag[BPF_TAG_SIZE]; }; struct bpf_prog_stats __percpu *stats; - int __percpu *active; + u8 __percpu *active; /* u8[BPF_NR_CONTEXTS] for recursion protection */ unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); struct bpf_prog_aux *aux; /* Auxiliary fields */ @@ -2006,12 +2008,36 @@ struct bpf_struct_ops_common_value { static inline bool bpf_prog_get_recursion_context(struct bpf_prog *prog) { - return this_cpu_inc_return(*(prog->active)) == 1; +#ifdef CONFIG_ARM64 + u8 rctx = interrupt_context_level(); + u8 *active = this_cpu_ptr(prog->active); + u32 val; + + preempt_disable(); + active[rctx]++; + val = le32_to_cpu(*(__le32 *)active); + preempt_enable(); + if (val != BIT(rctx * 8)) + return false; + + return true; +#else + return this_cpu_inc_return(*(int __percpu *)(prog->active)) == 1; +#endif } static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog) { - this_cpu_dec(*(prog->active)); +#ifdef CONFIG_ARM64 + u8 rctx = interrupt_context_level(); + u8 *active = this_cpu_ptr(prog->active); + + preempt_disable(); + active[rctx]--; + preempt_enable(); +#else + this_cpu_dec(*(int __percpu *)(prog->active)); +#endif } #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c66316e32563..e0b8a8a5aaa9 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -112,7 +112,8 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag vfree(fp); return NULL; } - fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags)); + fp->active = __alloc_percpu_gfp(sizeof(u8[BPF_NR_CONTEXTS]), 4, + bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags)); if (!fp->active) { vfree(fp); kfree(aux); -- 2.47.3