From: Luka Bai When doing psi_group_change, we always iterate all the cgroups from the child all the way up to the root cgroup. They are all double link list connected so it's hard for the CPU to prefetch this parent. So we tried to add a prefetch for the parent groupc, and it has quite some benefits for the final result. Signed-off-by: Luka Bai --- kernel/sched/psi.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7374c05a5751..9b7a85d1bc28 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -793,6 +793,15 @@ static void record_times(struct psi_group_cpu *groupc, u64 now) #define for_each_group(iter, group) \ for (typeof(group) iter = group; iter; iter = iter->parent) +static inline struct psi_group_cpu *prefetch_and_get_groupc(struct psi_group *group, int cpu) +{ + struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu); + + if (group->parent) + prefetchw(per_cpu_ptr(group->parent->pcpu, cpu)); + return groupc; +} + static void psi_group_change(struct psi_group *group, int cpu, unsigned int clear, unsigned int set, u64 now, bool wake_clock, bool curr_in_memstall) @@ -802,7 +811,7 @@ static void psi_group_change(struct psi_group *group, int cpu, u32 state_mask; lockdep_assert_rq_held(cpu_rq(cpu)); - groupc = per_cpu_ptr(group->pcpu, cpu); + groupc = prefetch_and_get_groupc(group, cpu); /* * Start with TSK_ONCPU, which doesn't have a corresponding -- 2.52.0