From: Mykyta Yatsenko Current code frees the context immediately when the refcount drops to zero, without waiting for any RCU grace period. This is unsafe: a concurrent BPF program calling bpf_task_work_schedule() may still hold the ctx without a reference for a very short period of time in bpf_task_work_acquire_ctx(). Fix this by deferring the free through two RCU grace periods: call_rcu_tasks_trace -> call_rcu -> free This is similar to bpf_async_cb freeing. Since bpf_task_work_ctx_put() can be called from IRQ-disabled context (e.g. NMI from tracing BPF programs), and call_rcu_tasks_trace is not safe there, use irq_work to defer the initial call_rcu_tasks_trace to a safe context. Fixes: 38aa7003e369 ("bpf: task work scheduling kfuncs") Signed-off-by: Mykyta Yatsenko --- kernel/bpf/helpers.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 7ac32798eb04..414e5b55cd37 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4160,6 +4160,31 @@ static void bpf_task_work_ctx_reset(struct bpf_task_work_ctx *ctx) } } +static void bpf_task_work_ctx_free_rcu(struct rcu_head *rcu) +{ + struct bpf_task_work_ctx *ctx = container_of(rcu, struct bpf_task_work_ctx, rcu); + + bpf_task_work_ctx_reset(ctx); + /* bpf_mem_free expects migration to be disabled */ + migrate_disable(); + bpf_mem_free(&bpf_global_ma, ctx); + migrate_enable(); +} + +static void bpf_task_work_ctx_free_rcu_tasks_trace(struct rcu_head *rcu) +{ + struct bpf_task_work_ctx *ctx = container_of(rcu, struct bpf_task_work_ctx, rcu); + + call_rcu(&ctx->rcu, bpf_task_work_ctx_free_rcu); +} + +static void bpf_task_work_ctx_delete(struct irq_work *irq_work) +{ + struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work); + + call_rcu_tasks_trace(&ctx->rcu, bpf_task_work_ctx_free_rcu_tasks_trace); +} + static bool bpf_task_work_ctx_tryget(struct bpf_task_work_ctx *ctx) { return refcount_inc_not_zero(&ctx->refcnt); @@ -4170,12 +4195,12 @@ static void bpf_task_work_ctx_put(struct bpf_task_work_ctx *ctx) if (!refcount_dec_and_test(&ctx->refcnt)) return; - bpf_task_work_ctx_reset(ctx); - - /* bpf_mem_free expects migration to be disabled */ - migrate_disable(); - bpf_mem_free(&bpf_global_ma, ctx); - migrate_enable(); + if (irqs_disabled()) { + init_irq_work(&ctx->irq_work, bpf_task_work_ctx_delete); + irq_work_queue(&ctx->irq_work); + } else { + bpf_task_work_ctx_delete(&ctx->irq_work); + } } static void bpf_task_work_cancel(struct bpf_task_work_ctx *ctx) -- 2.47.3 From: Mykyta Yatsenko Replace bpf_mem_alloc/bpf_mem_free with kmalloc_nolock/kfree_nolock for allocating and freeing bpf_task_work_ctx. Now that kmalloc can be used from NMI context, it is the right generic mechanism for memory allocation in BPF kfuncs, even in restricted contexts like tracing programs. Signed-off-by: Mykyta Yatsenko --- kernel/bpf/helpers.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 414e5b55cd37..d25aa51c80d3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4165,10 +4165,7 @@ static void bpf_task_work_ctx_free_rcu(struct rcu_head *rcu) struct bpf_task_work_ctx *ctx = container_of(rcu, struct bpf_task_work_ctx, rcu); bpf_task_work_ctx_reset(ctx); - /* bpf_mem_free expects migration to be disabled */ - migrate_disable(); - bpf_mem_free(&bpf_global_ma, ctx); - migrate_enable(); + kfree_nolock(ctx); } static void bpf_task_work_ctx_free_rcu_tasks_trace(struct rcu_head *rcu) @@ -4295,7 +4292,7 @@ static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *t if (ctx) return ctx; - ctx = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_task_work_ctx)); + ctx = kmalloc_nolock(sizeof(struct bpf_task_work_ctx), 0, map->numa_node); if (!ctx) return ERR_PTR(-ENOMEM); @@ -4309,7 +4306,7 @@ static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *t * tw->ctx is set by concurrent BPF program, release allocated * memory and try to reuse already set context. */ - bpf_mem_free(&bpf_global_ma, ctx); + kfree_nolock(ctx); return old_ctx; } -- 2.47.3