It turns out the generic disable/enable irq this_cpu_cmpxchg implementation is faster than LL/SC or lse implementation. Remove HAVE_CMPXCHG_LOCAL for better performance on arm64. Tested on Quad 1.9GHZ CA55 platform: average mod_node_page_state() cost decreases from 167ns to 103ns the spawn (30 duration) benchmark in unixbench is improved from 147494 lps to 150561 lps, improved by 2.1% Tested on Quad 2.1GHZ CA73 platform: average mod_node_page_state() cost decreases from 113ns to 85ns the spawn (30 duration) benchmark in unixbench is improved from 209844 lps to 212581 lps, improved by 1.3% Signed-off-by: Jisheng Zhang --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/percpu.h | 24 ------------------------ 2 files changed, 25 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 38dba5f7e4d2..5e7e2e65d5a5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -205,7 +205,6 @@ config ARM64 select HAVE_EBPF_JIT select HAVE_C_RECORDMCOUNT select HAVE_CMPXCHG_DOUBLE - select HAVE_CMPXCHG_LOCAL select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index b57b2bb00967..70ffe566cb4b 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -232,30 +232,6 @@ PERCPU_RET_OP(add, add, ldadd) #define this_cpu_xchg_8(pcp, val) \ _pcp_protect_return(xchg_relaxed, pcp, val) -#define this_cpu_cmpxchg_1(pcp, o, n) \ - _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) -#define this_cpu_cmpxchg_2(pcp, o, n) \ - _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) -#define this_cpu_cmpxchg_4(pcp, o, n) \ - _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) -#define this_cpu_cmpxchg_8(pcp, o, n) \ - _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) - -#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) - -#define this_cpu_cmpxchg128(pcp, o, n) \ -({ \ - typedef typeof(pcp) pcp_op_T__; \ - u128 old__, new__, ret__; \ - pcp_op_T__ *ptr__; \ - old__ = o; \ - new__ = n; \ - preempt_disable_notrace(); \ - ptr__ = raw_cpu_ptr(&(pcp)); \ - ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \ - preempt_enable_notrace(); \ - ret__; \ -}) #ifdef __KVM_NVHE_HYPERVISOR__ extern unsigned long __hyp_per_cpu_offset(unsigned int cpu); -- 2.51.0