From: Mykyta Yatsenko Refactor __bpf_async_set_callback() getting rid of locks. The idea of the algorithm is to store both callback_fn and prog in struct bpf_async_cb and verify that both pointers are stored, if any pointer does not match (because of the concurrent update), retry until complete match. On each iteration, increment refcnt of the prog that is going to be set and decrement the one that is evicted, ensuring that get/put are balanced, as each iteration has both inc/dec. Signed-off-by: Mykyta Yatsenko --- kernel/bpf/helpers.c | 61 ++++++++++++++++++---------------------------------- 1 file changed, 21 insertions(+), 40 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 9eaa4185e0a79b903c6fc2ccb310f521a4b14a1d..954bd61310a6ad3a0d540c1b1ebe8c35a9c0119c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1355,55 +1355,36 @@ static const struct bpf_func_proto bpf_timer_init_proto = { }; static int __bpf_async_set_callback(struct bpf_async_kern *async, void *callback_fn, - struct bpf_prog_aux *aux, unsigned int flags, - enum bpf_async_type type) + struct bpf_prog *prog) { - struct bpf_prog *prev, *prog = aux->prog; - struct bpf_async_cb *cb; - int ret = 0; + struct bpf_prog *prev; + struct bpf_async_cb *cb = async->cb; - if (in_nmi()) - return -EOPNOTSUPP; - __bpf_spin_lock_irqsave(&async->lock); - cb = async->cb; - if (!cb) { - ret = -EINVAL; - goto out; - } - if (!atomic64_read(&cb->map->usercnt)) { - /* maps with timers must be either held by user space - * or pinned in bpffs. Otherwise timer might still be - * running even when bpf prog is detached and user space - * is gone, since map_release_uref won't ever be called. - */ - ret = -EPERM; - goto out; - } - prev = cb->prog; - if (prev != prog) { - /* Bump prog refcnt once. Every bpf_timer_set_callback() - * can pick different callback_fn-s within the same prog. - */ - prog = bpf_prog_inc_not_zero(prog); - if (IS_ERR(prog)) { - ret = PTR_ERR(prog); - goto out; + if (!cb) + return -EPERM; + + do { + if (prog) { + prog = bpf_prog_inc_not_zero(prog); + if (IS_ERR(prog)) + return PTR_ERR(prog); } + + prev = xchg(&cb->prog, prog); + rcu_assign_pointer(cb->callback_fn, callback_fn); + if (prev) - /* Drop prev prog refcnt when swapping with new prog */ bpf_prog_put(prev); - cb->prog = prog; - } - rcu_assign_pointer(cb->callback_fn, callback_fn); -out: - __bpf_spin_unlock_irqrestore(&async->lock); - return ret; + + } while (READ_ONCE(cb->prog) != prog || READ_ONCE(cb->callback_fn) != callback_fn); + + return 0; } BPF_CALL_3(bpf_timer_set_callback, struct bpf_async_kern *, timer, void *, callback_fn, struct bpf_prog_aux *, aux) { - return __bpf_async_set_callback(timer, callback_fn, aux, 0, BPF_ASYNC_TYPE_TIMER); + return __bpf_async_set_callback(timer, callback_fn, aux->prog); } static const struct bpf_func_proto bpf_timer_set_callback_proto = { @@ -3131,7 +3112,7 @@ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq, if (flags) return -EINVAL; - return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ); + return __bpf_async_set_callback(async, callback_fn, aux->prog); } __bpf_kfunc void bpf_preempt_disable(void) -- 2.52.0