RISC-V percpu addressing currently derives the base offset from the CPU number and __per_cpu_offset[]. Cache the current CPU percpu offset in thread_info so percpu accesses can load it directly. Keep the cached value up to date for the boot CPU, context switches and secondary CPU bringup. Initialize secondary idle tasks before they start running so early percpu accesses use the secondary CPU offset rather than inheriting the boot CPU value. Link: https://lists.riscv.org/g/tech-privileged/topic/risc_v_tech_arch_review/113437553?page=2 Signed-off-by: Yunhui Cui --- arch/riscv/include/asm/asm.h | 6 +----- arch/riscv/include/asm/percpu.h | 4 ++++ arch/riscv/include/asm/switch_to.h | 8 ++++++++ arch/riscv/include/asm/thread_info.h | 3 ++- arch/riscv/kernel/asm-offsets.c | 1 + arch/riscv/kernel/smpboot.c | 8 ++++++++ arch/riscv/net/bpf_jit_comp64.c | 9 +-------- 7 files changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index e9e8ba83e632f..137a49488325e 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -91,11 +91,7 @@ #ifdef CONFIG_SMP .macro asm_per_cpu dst sym tmp - lw \tmp, TASK_TI_CPU_NUM(tp) - slli \tmp, \tmp, RISCV_LGPTR - la \dst, __per_cpu_offset - add \dst, \dst, \tmp - REG_L \tmp, 0(\dst) + REG_L \tmp, TASK_TI_PCPU_OFFSET(tp) la \dst, \sym add \dst, \dst, \tmp .endm diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h index 76b1b8c1fb953..9bf9f4cf4ca5a 100644 --- a/arch/riscv/include/asm/percpu.h +++ b/arch/riscv/include/asm/percpu.h @@ -9,7 +9,9 @@ #include #include #include +#include #include +#include #define PERCPU_RW_OPS(sz) \ static inline unsigned long __percpu_read_##sz(void *ptr) \ @@ -279,6 +281,8 @@ _pcp_protect_return(__percpu_add_return_amo_case_64, pcp, val) }) #endif +#define __my_cpu_offset (((struct thread_info *)current)->pcpu_offset) + #undef PERCPU_RW_OPS #undef __PERCPU_AMO_OP_CASE #undef PERCPU_OP diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 0e71eb82f920c..733b6cd306e40 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -88,6 +88,13 @@ static inline void __switch_to_envcfg(struct task_struct *next) :: "r" (next->thread.envcfg) : "memory"); } +static inline void __switch_to_pcpu_offset(struct task_struct *next) +{ +#ifdef CONFIG_SMP + next->thread_info.pcpu_offset = __my_cpu_offset; +#endif +} + extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); @@ -122,6 +129,7 @@ do { \ if (switch_to_should_flush_icache(__next)) \ local_flush_icache_all(); \ __switch_to_envcfg(__next); \ + __switch_to_pcpu_offset(__next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 55019fdfa9eca..f10ba62b61016 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -53,6 +53,7 @@ struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0=>preemptible, <0=>BUG */ + int cpu; /* * These stack pointers are overwritten on every system call or * exception. SP is also saved to the stack it can be recovered when @@ -60,8 +61,8 @@ struct thread_info { */ long kernel_sp; /* Kernel stack pointer */ long user_sp; /* User stack pointer */ - int cpu; unsigned long syscall_work; /* SYSCALL_WORK_ flags */ + unsigned long pcpu_offset; #ifdef CONFIG_SHADOW_CALL_STACK void *scs_base; void *scs_sp; diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index a75f0cfea1e9f..20d46c28fdde9 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -38,6 +38,7 @@ void asm_offsets(void) OFFSET(TASK_THREAD_SUM, task_struct, thread.sum); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); + OFFSET(TASK_TI_PCPU_OFFSET, task_struct, thread_info.pcpu_offset); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index f6ef57930b50a..7876854d16279 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -191,6 +191,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int ret; tidle->thread_info.cpu = cpu; + tidle->thread_info.pcpu_offset = per_cpu_offset(cpu); ret = start_secondary_cpu(cpu, tidle); if (!ret) { @@ -208,6 +209,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) } #endif +void __init smp_prepare_boot_cpu(void) +{ + __my_cpu_offset = per_cpu_offset(smp_processor_id()); +} + void __init smp_cpus_done(unsigned int max_cpus) { } @@ -233,6 +239,8 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; + __my_cpu_offset = per_cpu_offset(smp_processor_id()); + #ifdef CONFIG_HOTPLUG_PARALLEL cpuhp_ap_sync_alive(); #endif diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index c03c1de16b79a..eab93d5258e9e 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1395,15 +1395,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, if (rd != rs) emit_mv(rd, rs, ctx); #ifdef CONFIG_SMP - /* Load current CPU number in T1 */ - emit_lw(RV_REG_T1, offsetof(struct thread_info, cpu), + emit_ld(RV_REG_T1, offsetof(struct thread_info, pcpu_offset), RV_REG_TP, ctx); - /* Load address of __per_cpu_offset array in T2 */ - emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); - /* Get address of __per_cpu_offset[cpu] in T1 */ - emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx); - /* Load __per_cpu_offset[cpu] in T1 */ - emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx); /* Add the offset to Rd */ emit_add(rd, rd, RV_REG_T1, ctx); #endif -- 2.39.5