For now, migrate_enable and migrate_disable are global, which makes them become hotspots in some case. Take BPF for example, the function calling to migrate_enable and migrate_disable in BPF trampoline can introduce significant overhead, and following is the 'perf top' of FENTRY's benchmark (./tools/testing/selftests/bpf/bench trig-fentry): 54.63% bpf_prog_2dcccf652aac1793_bench_trigger_fentry [k] bpf_prog_2dcccf652aac1793_bench_trigger_fentry 10.43% [kernel] [k] migrate_enable 10.07% bpf_trampoline_6442517037 [k] bpf_trampoline_6442517037 8.06% [kernel] [k] __bpf_prog_exit_recur 4.11% libc.so.6 [.] syscall 2.15% [kernel] [k] entry_SYSCALL_64 1.48% [kernel] [k] memchr_inv 1.32% [kernel] [k] fput 1.16% [kernel] [k] _copy_to_user 0.73% [kernel] [k] bpf_prog_test_run_raw_tp So in this commit, we make migrate_enable/migrate_disable inline to obtain better performance. The struct rq is defined internally in kernel/sched/sched.h, and the field "nr_pinned" is accessed in migrate_enable/migrate_disable, which makes it hard to make them inline. Alexei Starovoitov suggests to generate the offset of "nr_pinned" in [1], so we can define the migrate_enable/migrate_disable in include/linux/sched.h and access "this_rq()->nr_pinned" with "(void *)this_rq() + RQ_nr_pinned". The offset of "nr_pinned" is generated in include/generated/rq-offsets.h by kernel/sched/rq-offsets.c. Generally speaking, we move the definition of migrate_enable and migrate_disable to include/linux/sched.h from kernel/sched/core.c. The calling to __set_cpus_allowed_ptr() is leaved in __migrate_enable(). The "struct rq" is not available in include/linux/sched.h, so we can't access the "runqueues" with this_cpu_ptr(), as the compilation will fail in this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): typeof((ptr) + 0) So we introduce the this_rq_raw() and access the runqueues with arch_raw_cpu_ptr() directly. Before this patch, the performance of BPF FENTRY is: fentry : 113.030 ± 0.149M/s fentry : 112.501 ± 0.187M/s fentry : 112.828 ± 0.267M/s fentry : 115.287 ± 0.241M/s After this patch, the performance of BPF FENTRY increases to: fentry : 143.644 ± 0.670M/s fentry : 149.764 ± 0.362M/s fentry : 149.642 ± 0.156M/s fentry : 145.263 ± 0.221M/s Link: https://lore.kernel.org/bpf/CAADnVQ+5sEDKHdsJY5ZsfGDO_1SEhhQWHrt2SMBG5SYyQ+jt7w@mail.gmail.com/ [1] Signed-off-by: Menglong Dong --- Kbuild | 13 ++++++- include/linux/preempt.h | 3 -- include/linux/sched.h | 72 +++++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 3 +- kernel/sched/core.c | 56 +++--------------------------- kernel/sched/rq-offsets.c | 12 +++++++ 6 files changed, 101 insertions(+), 58 deletions(-) create mode 100644 kernel/sched/rq-offsets.c diff --git a/Kbuild b/Kbuild index f327ca86990c..13324b4bbe23 100644 --- a/Kbuild +++ b/Kbuild @@ -34,13 +34,24 @@ arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file) $(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE $(call filechk,offsets,__ASM_OFFSETS_H__) +# Generate rq-offsets.h + +rq-offsets-file := include/generated/rq-offsets.h + +targets += kernel/sched/rq-offsets.s + +kernel/sched/rq-offsets.s: $(offsets-file) + +$(rq-offsets-file): kernel/sched/rq-offsets.s FORCE + $(call filechk,offsets,__RQ_OFFSETS_H__) + # Check for missing system calls quiet_cmd_syscalls = CALL $< cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags) PHONY += missing-syscalls -missing-syscalls: scripts/checksyscalls.sh $(offsets-file) +missing-syscalls: scripts/checksyscalls.sh $(rq-offsets-file) $(call cmd,syscalls) # Check the manual modification of atomic headers diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1fad1c8a4c76..92237c319035 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * work-conserving schedulers. * */ -extern void migrate_disable(void); -extern void migrate_enable(void); /** * preempt_disable_nested - Disable preemption inside a normally preempt disabled section @@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void) DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) -DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) #ifdef CONFIG_PREEMPT_DYNAMIC diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b272382673d..be489558207f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -49,6 +49,9 @@ #include #include #include +#ifndef COMPILE_OFFSETS +#include +#endif /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -2307,4 +2310,73 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo #define alloc_tag_restore(_tag, _old) do {} while (0) #endif +#if defined(CONFIG_SMP) && !defined(COMPILE_OFFSETS) + +extern void __migrate_enable(void); + +struct rq; +DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); +#define this_rq_raw() arch_raw_cpu_ptr(&runqueues) + +static inline void migrate_enable(void) +{ + struct task_struct *p = current; + +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Check both overflow from migrate_disable() and superfluous + * migrate_enable(). + */ + if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) + return; +#endif + + if (p->migration_disabled > 1) { + p->migration_disabled--; + return; + } + + /* + * Ensure stop_task runs either before or after this, and that + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). + */ + guard(preempt)(); + if (unlikely(p->cpus_ptr != &p->cpus_mask)) + __migrate_enable(); + /* + * Mustn't clear migration_disabled() until cpus_ptr points back at the + * regular cpus_mask, otherwise things that race (eg. + * select_fallback_rq) get confused. + */ + barrier(); + p->migration_disabled = 0; + (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))--; +} + +static inline void migrate_disable(void) +{ + struct task_struct *p = current; + + if (p->migration_disabled) { +#ifdef CONFIG_DEBUG_PREEMPT + /* + *Warn about overflow half-way through the range. + */ + WARN_ON_ONCE((s16)p->migration_disabled < 0); +#endif + p->migration_disabled++; + return; + } + + guard(preempt)(); + (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))++; + p->migration_disabled = 1; +} +#else +static inline void migrate_disable(void) { } +static inline void migrate_enable(void) { } +#endif + +DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) + #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0806295945e4..bfba29a4fb10 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23853,8 +23853,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, BTF_SET_START(btf_id_deny) BTF_ID_UNUSED #ifdef CONFIG_SMP -BTF_ID(func, migrate_disable) -BTF_ID(func, migrate_enable) +BTF_ID(func, __migrate_enable) #endif #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU BTF_ID(func, rcu_read_unlock_strict) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index be00629f0ba4..00383fed9f63 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -119,6 +119,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); +EXPORT_SYMBOL_GPL(runqueues); #ifdef CONFIG_SCHED_PROXY_EXEC DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec); @@ -2381,28 +2382,7 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p) __do_set_cpus_allowed(p, &ac); } -void migrate_disable(void) -{ - struct task_struct *p = current; - - if (p->migration_disabled) { -#ifdef CONFIG_DEBUG_PREEMPT - /* - *Warn about overflow half-way through the range. - */ - WARN_ON_ONCE((s16)p->migration_disabled < 0); -#endif - p->migration_disabled++; - return; - } - - guard(preempt)(); - this_rq()->nr_pinned++; - p->migration_disabled = 1; -} -EXPORT_SYMBOL_GPL(migrate_disable); - -void migrate_enable(void) +void __migrate_enable(void) { struct task_struct *p = current; struct affinity_context ac = { @@ -2410,37 +2390,9 @@ void migrate_enable(void) .flags = SCA_MIGRATE_ENABLE, }; -#ifdef CONFIG_DEBUG_PREEMPT - /* - * Check both overflow from migrate_disable() and superfluous - * migrate_enable(). - */ - if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) - return; -#endif - - if (p->migration_disabled > 1) { - p->migration_disabled--; - return; - } - - /* - * Ensure stop_task runs either before or after this, and that - * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). - */ - guard(preempt)(); - if (p->cpus_ptr != &p->cpus_mask) - __set_cpus_allowed_ptr(p, &ac); - /* - * Mustn't clear migration_disabled() until cpus_ptr points back at the - * regular cpus_mask, otherwise things that race (eg. - * select_fallback_rq) get confused. - */ - barrier(); - p->migration_disabled = 0; - this_rq()->nr_pinned--; + __set_cpus_allowed_ptr(p, &ac); } -EXPORT_SYMBOL_GPL(migrate_enable); +EXPORT_SYMBOL_GPL(__migrate_enable); static inline bool rq_has_pinned_tasks(struct rq *rq) { diff --git a/kernel/sched/rq-offsets.c b/kernel/sched/rq-offsets.c new file mode 100644 index 000000000000..a23747bbe25b --- /dev/null +++ b/kernel/sched/rq-offsets.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#define COMPILE_OFFSETS +#include +#include +#include "sched.h" + +int main(void) +{ + DEFINE(RQ_nr_pinned, offsetof(struct rq, nr_pinned)); + + return 0; +} -- 2.50.1