From: Wanpeng Li Introduce foundational infrastructure for the vCPU debooster mechanism to improve yield_to() effectiveness in virtualization workloads. Add per-rq tracking fields for rate limiting (yield_deboost_last_time_ns) and debouncing (yield_deboost_last_src/dst_pid, last_pair_time_ns). Introduce global sysctl knob sysctl_sched_vcpu_debooster_enabled for runtime control, defaulting to enabled. Add debugfs interface for observability and initialization in sched_init(). The infrastructure is inert at this stage as no deboost logic is implemented yet, allowing independent verification that existing behavior remains unchanged. v1 -> v2: - Rename debugfs entry from sched_vcpu_debooster_enabled to vcpu_debooster_enabled for consistency with other sched debugfs entries - Add explicit initialization of yield_deboost_last_time_ns to 0 in sched_init() for clarity - Improve comments to follow kernel documentation style Signed-off-by: Wanpeng Li --- kernel/sched/core.c | 9 +++++++-- kernel/sched/debug.c | 2 ++ kernel/sched/fair.c | 7 +++++++ kernel/sched/sched.h | 12 ++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 41ba0be16911..9f0936b9c1c9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8606,9 +8606,14 @@ void __init sched_init(void) #endif /* CONFIG_CGROUP_SCHED */ for_each_possible_cpu(i) { - struct rq *rq; + struct rq *rq = cpu_rq(i); + + /* Initialize vCPU debooster per-rq state */ + rq->yield_deboost_last_time_ns = 0; + rq->yield_deboost_last_src_pid = -1; + rq->yield_deboost_last_dst_pid = -1; + rq->yield_deboost_last_pair_time_ns = 0; - rq = cpu_rq(i); raw_spin_lock_init(&rq->__lock); rq->nr_running = 0; rq->calc_load_active = 0; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 41caa22e0680..13e67617549d 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -508,6 +508,8 @@ static __init int sched_init_debug(void) debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); + debugfs_create_u32("vcpu_debooster_enabled", 0644, debugfs_sched, + &sysctl_sched_vcpu_debooster_enabled); sched_domains_mutex_lock(); update_sched_domain_debugfs(); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index da46c3164537..87c30db2c853 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -81,6 +81,13 @@ static unsigned int normalized_sysctl_sched_base_slice = 700000ULL; __read_mostly unsigned int sysctl_sched_migration_cost = 500000UL; +/* + * vCPU debooster: runtime toggle for yield_to() vruntime penalty mechanism. + * When enabled (default), yield_to() applies bounded vruntime penalties to + * improve lock holder scheduling in virtualized environments. + */ +unsigned int sysctl_sched_vcpu_debooster_enabled __read_mostly = 1; + static int __init setup_sched_thermal_decay_shift(char *str) { pr_warn("Ignoring the deprecated sched_thermal_decay_shift= option\n"); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d30cca6870f5..b7aa0d35c793 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1294,6 +1294,16 @@ struct rq { unsigned int push_busy; struct cpu_stop_work push_work; + /* + * vCPU debooster: per-rq state for yield_to() optimization. + * Used to rate-limit and debounce vruntime penalties applied + * when a vCPU yields to a lock holder. + */ + u64 yield_deboost_last_time_ns; + pid_t yield_deboost_last_src_pid; + pid_t yield_deboost_last_dst_pid; + u64 yield_deboost_last_pair_time_ns; + #ifdef CONFIG_SCHED_CORE /* per rq */ struct rq *core; @@ -2958,6 +2968,8 @@ extern int sysctl_resched_latency_warn_once; extern unsigned int sysctl_sched_tunable_scaling; +extern unsigned int sysctl_sched_vcpu_debooster_enabled; + extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; extern unsigned int sysctl_numa_balancing_scan_period_max; -- 2.43.0