Switch out the conditional load interfaces used by rqspinlock to smp_cond_read_acquire_timeout() and its wrapper, atomic_cond_read_acquire_timeout(). Both these handle the timeout and amortize as needed, so use the non-amortized RES_CHECK_TIMEOUT. RES_CHECK_TIMEOUT does double duty here -- presenting the current clock value, the timeout/deadlock error from clock_deadlock() to the cond-load and, returning the error value via ret. For correctness, we need to ensure that the error case of the cond-load interface always agrees with that in clock_deadlock(). For the most part, this is fine because there's no independent clock, or double reads from the clock in cond-load -- either of which could lead to its internal state going out of sync from that of clock_deadlock(). There is, however, an edge case where clock_deadlock() checks for: if (time > ts->timeout_end) return -ETIMEDOUT; while smp_cond_load_acquire_timeout() checks for: __time_now = (time_expr_ns); if (__time_now <= 0 || __time_now >= __time_end) { VAL = READ_ONCE(*__PTR); break; } This runs into a problem when (__time_now == __time_end) since clock_deadlock() does not treat it as a timeout condition but the second clause in the conditional above does. So, add an equality check in clock_deadlock(). Finally, redefine SMP_TIMEOUT_POLL_COUNT to be 16k to be similar to the spin-count used in the amortized version. We only do this for non-arm64 as that uses a waiting implementation. Cc: bpf@vger.kernel.org Cc: Kumar Kartikeya Dwivedi Cc: Alexei Starovoitov Signed-off-by: Ankur Arora --- kernel/bpf/rqspinlock.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c index 0ec17ebb67c1..e5e27266b813 100644 --- a/kernel/bpf/rqspinlock.c +++ b/kernel/bpf/rqspinlock.c @@ -215,7 +215,7 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask, } time = ktime_get_mono_fast_ns(); - if (time > ts->timeout_end) + if (time >= ts->timeout_end) return -ETIMEDOUT; /* @@ -235,11 +235,10 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask, } /* - * Do not amortize with spins when res_smp_cond_load_acquire is defined, - * as the macro does internal amortization for us. + * Spin amortized version of RES_CHECK_TIMEOUT. Used when busy-waiting in + * atomic_try_cmpxchg(). */ -#ifndef res_smp_cond_load_acquire -#define RES_CHECK_TIMEOUT(ts, ret, mask) \ +#define RES_CHECK_TIMEOUT_AMORTIZED(ts, ret, mask) \ ({ \ s64 __timeval_err = 0; \ if (!(ts).spin++) \ @@ -247,7 +246,7 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask, (ret) = __timeval_err < 0 ? __timeval_err : 0; \ __timeval_err; \ }) -#else + #define RES_CHECK_TIMEOUT(ts, ret, mask) \ ({ \ s64 __timeval_err; \ @@ -255,7 +254,6 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask, (ret) = __timeval_err < 0 ? __timeval_err : 0; \ __timeval_err; \ }) -#endif /* * Initialize the 'spin' member. @@ -269,6 +267,17 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask, */ #define RES_RESET_TIMEOUT(ts, _duration) ({ (ts).timeout_end = 0; (ts).duration = _duration; }) +/* + * Limit how often we invoke clock_deadlock() while spin-waiting in + * smp_cond_load_acquire_timeout() or atomic_cond_read_acquire_timeout(). + * + * We only override the default value not superceding ARM64's override. + */ +#ifndef CONFIG_ARM64 +#undef SMP_TIMEOUT_POLL_COUNT +#define SMP_TIMEOUT_POLL_COUNT (16*1024) +#endif + /* * Provide a test-and-set fallback for cases when queued spin lock support is * absent from the architecture. @@ -296,7 +305,7 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock) val = atomic_read(&lock->val); if (val || !atomic_try_cmpxchg(&lock->val, &val, 1)) { - if (RES_CHECK_TIMEOUT(ts, ret, ~0u) < 0) + if (RES_CHECK_TIMEOUT_AMORTIZED(ts, ret, ~0u) < 0) goto out; cpu_relax(); goto retry; @@ -319,12 +328,6 @@ EXPORT_SYMBOL_GPL(resilient_tas_spin_lock); */ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]); -#ifndef res_smp_cond_load_acquire -#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire(v, c) -#endif - -#define res_atomic_cond_read_acquire(v, c) res_smp_cond_load_acquire(&(v)->counter, (c)) - /** * resilient_queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure @@ -421,7 +424,9 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) */ if (val & _Q_LOCKED_MASK) { RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); - res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK) < 0); + smp_cond_load_acquire_timeout(&lock->locked, !VAL, + RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK), + ts.duration); } if (ret) { @@ -582,8 +587,9 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) * us. */ RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2); - val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) || - RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK) < 0); + val = atomic_cond_read_acquire_timeout(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK), + RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK), + ts.duration); /* Disable queue destruction when we detect deadlocks. */ if (ret == -EDEADLK) { -- 2.31.1