To handle WFET use __cmpwait_timeout() similarly to __cmpwait(). These call out to the respective __cmpwait_case_timeout_##sz(), __cmpwait_case_##sz() functions. Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Ankur Arora --- Notes: - split out the WFET handling into __cmpwait_case_timeout_##sz() instead of overloading __cmpwait_case_##sz(). - in one of the review comments there was a disussion of an added warning for long timeout_ns values. I did not add a warning (or __bad_ndelay() style failure). However, a comment smp_cond_load_relaxed_timeout() does mention that it might not be a good idea to poll for long periods if you don't have HAVE_ARCH_CPU_RELAX. The reason for the lack of proper warning/error is that for cases where this interface is used in the spinlock path (as rqspinlock does) there's no way to avoid this kind of polling. arch/arm64/include/asm/barrier.h | 8 ++-- arch/arm64/include/asm/cmpxchg.h | 65 ++++++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 6190e178db51..fbd71cd4ef4e 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -224,8 +224,8 @@ do { \ extern bool arch_timer_evtstrm_available(void); /* - * In the common case, cpu_poll_relax() sits waiting in __cmpwait_relaxed() - * for the ptr value to change. + * In the common case, cpu_poll_relax() sits waiting in __cmpwait_relaxed()/ + * __cmpwait_relaxed_timeout() for the ptr value to change. * * Since this period is reasonably long, choose SMP_TIMEOUT_POLL_COUNT * to be 1, so smp_cond_load_{relaxed,acquire}_timeout() does a @@ -234,7 +234,9 @@ extern bool arch_timer_evtstrm_available(void); #define SMP_TIMEOUT_POLL_COUNT 1 #define cpu_poll_relax(ptr, val, timeout_ns) do { \ - if (arch_timer_evtstrm_available()) \ + if (alternative_has_cap_unlikely(ARM64_HAS_WFXT)) \ + __cmpwait_relaxed_timeout(ptr, val, timeout_ns); \ + else if (arch_timer_evtstrm_available()) \ __cmpwait_relaxed(ptr, val); \ else \ cpu_relax(); \ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index d7a540736741..dfb7d10a18be 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -12,6 +12,7 @@ #include #include +#include /* * We need separate acquire parameters for ll/sc and lse, since the full @@ -208,9 +209,13 @@ __CMPXCHG_GEN(_mb) __cmpxchg128((ptr), (o), (n)); \ }) +/* Re-declared here to avoid include dependency. */ +extern u64 (*arch_timer_read_counter)(void); + #define __CMPWAIT_CASE(w, sfx, sz) \ static inline void __cmpwait_case_##sz(volatile void *ptr, \ - unsigned long val) \ + unsigned long val, \ + u64 __maybe_unused timeout_ns) \ { \ unsigned long tmp; \ \ @@ -233,20 +238,52 @@ __CMPWAIT_CASE( , , 64); #undef __CMPWAIT_CASE -#define __CMPWAIT_GEN(sfx) \ -static __always_inline void __cmpwait##sfx(volatile void *ptr, \ - unsigned long val, \ - int size) \ +#define __CMPWAIT_TIMEOUT_CASE(w, sfx, sz) \ +static inline void __cmpwait_case_timeout_##sz(volatile void *ptr, \ + unsigned long val, \ + u64 timeout_ns) \ +{ \ + unsigned long tmp; \ + u64 ecycles = arch_timer_read_counter() + \ + NSECS_TO_CYCLES(timeout_ns); \ + asm volatile( \ + " sevl\n" \ + " wfe\n" \ + " ldxr" #sfx "\t%" #w "[tmp], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " cbnz %" #w "[tmp], 2f\n" \ + " msr s0_3_c1_c0_0, %[ecycles]\n" \ + "2:" \ + : [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr) \ + : [val] "r" (val), [ecycles] "r" (ecycles)); \ +} + +__CMPWAIT_TIMEOUT_CASE(w, b, 8); +__CMPWAIT_TIMEOUT_CASE(w, h, 16); +__CMPWAIT_TIMEOUT_CASE(w, , 32); +__CMPWAIT_TIMEOUT_CASE( , , 64); + +#undef __CMPWAIT_TIMEOUT_CASE + +#define __CMPWAIT_GEN(timeout, sfx) \ +static __always_inline void __cmpwait##timeout##sfx(volatile void *ptr, \ + unsigned long val, \ + u64 timeout_ns, \ + int size) \ { \ switch (size) { \ case 1: \ - return __cmpwait_case##sfx##_8(ptr, (u8)val); \ + return __cmpwait_case##timeout##sfx##_8(ptr, (u8)val, \ + timeout_ns); \ case 2: \ - return __cmpwait_case##sfx##_16(ptr, (u16)val); \ + return __cmpwait_case##timeout##sfx##_16(ptr, (u16)val, \ + timeout_ns); \ case 4: \ - return __cmpwait_case##sfx##_32(ptr, val); \ + return __cmpwait_case##timeout##sfx##_32(ptr, val, \ + timeout_ns); \ case 8: \ - return __cmpwait_case##sfx##_64(ptr, val); \ + return __cmpwait_case##timeout##sfx##_64(ptr, val, \ + timeout_ns); \ default: \ BUILD_BUG(); \ } \ @@ -254,11 +291,15 @@ static __always_inline void __cmpwait##sfx(volatile void *ptr, \ unreachable(); \ } -__CMPWAIT_GEN() +__CMPWAIT_GEN( , ) +__CMPWAIT_GEN(_timeout, ) #undef __CMPWAIT_GEN -#define __cmpwait_relaxed(ptr, val) \ - __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) +#define __cmpwait_relaxed_timeout(ptr, val, timeout_ns) \ + __cmpwait_timeout((ptr), (unsigned long)(val), timeout_ns, sizeof(*(ptr))) + +#define __cmpwait_relaxed(ptr, val) \ + __cmpwait((ptr), (unsigned long)(val), 0, sizeof(*(ptr))) #endif /* __ASM_CMPXCHG_H */ -- 2.31.1