The current implementation shares the same dept key for multiple synchronization points, which can lead to false positive reports in dependency tracking and potential confusion in debugging. For example, both normal RCU and tasks trace RCU synchronization points use the same dept key. Specifically: 1. synchronize_rcu() uses a dept key embedded in __wait_rcu_gp(): synchronize_rcu() synchronize_rcu_normal() _wait_rcu_gp() __wait_rcu_gp() <- the key as static variable 2. synchronize_rcu_tasks_trace() uses the dept key, too: synchronize_rcu_tasks_trace() synchronize_rcu_tasks_generic() _wait_rcu_gp() __wait_rcu_gp() <- the key as static variable Since the both rely on the same dept key, dept may report false positive circular dependency. To resolve this, separate dept keys and maps should be assigned to each struct rcu_synchronize. =================================================== DEPT: Circular dependency has been detected. 6.15.0-rc6-00042-ged94bafc6405 #2 Not tainted --------------------------------------------------- summary --------------------------------------------------- *** DEADLOCK *** context A [S] lock(cpu_hotplug_lock:0) [W] __wait_rcu_gp(:0) [E] unlock(cpu_hotplug_lock:0) context B [S] (unknown)(:0) [W] lock(cpu_hotplug_lock:0) [E] try_to_wake_up(:0) [S]: start of the event context [W]: the wait blocked [E]: the event not reachable --------------------------------------------------- context A's detail --------------------------------------------------- context A [S] lock(cpu_hotplug_lock:0) [W] __wait_rcu_gp(:0) [E] unlock(cpu_hotplug_lock:0) [S] lock(cpu_hotplug_lock:0): [] cpus_read_lock+0x14/0x20 stacktrace: percpu_down_read.constprop.0+0x88/0x2ec cpus_read_lock+0x14/0x20 cgroup_procs_write_start+0x164/0x634 __cgroup_procs_write+0xdc/0x4d0 cgroup_procs_write+0x34/0x74 cgroup_file_write+0x25c/0x670 kernfs_fop_write_iter+0x2ec/0x498 vfs_write+0x574/0xc30 ksys_write+0x124/0x244 __arm64_sys_write+0x70/0xa4 invoke_syscall+0x88/0x2e0 el0_svc_common.constprop.0+0xe8/0x2e0 do_el0_svc+0x44/0x60 el0_svc+0x50/0x188 el0t_64_sync_handler+0x10c/0x140 el0t_64_sync+0x198/0x19c [W] __wait_rcu_gp(:0): [] __wait_rcu_gp+0x324/0x498 stacktrace: schedule+0xcc/0x348 schedule_timeout+0x1a4/0x268 __wait_for_common+0x1c4/0x3f0 __wait_for_completion_state+0x20/0x38 __wait_rcu_gp+0x35c/0x498 synchronize_rcu_normal+0x200/0x218 synchronize_rcu+0x234/0x2a0 rcu_sync_enter+0x11c/0x300 percpu_down_write+0xb4/0x3e0 cgroup_procs_write_start+0x174/0x634 __cgroup_procs_write+0xdc/0x4d0 cgroup_procs_write+0x34/0x74 cgroup_file_write+0x25c/0x670 kernfs_fop_write_iter+0x2ec/0x498 vfs_write+0x574/0xc30 ksys_write+0x124/0x244 [E] unlock(cpu_hotplug_lock:0): (N/A) --------------------------------------------------- context B's detail --------------------------------------------------- context B [S] (unknown)(:0) [W] lock(cpu_hotplug_lock:0) [E] try_to_wake_up(:0) [S] (unknown)(:0): (N/A) [W] lock(cpu_hotplug_lock:0): [] cpus_read_lock+0x14/0x20 stacktrace: percpu_down_read.constprop.0+0x6c/0x2ec cpus_read_lock+0x14/0x20 check_all_holdout_tasks_trace+0x90/0xa30 rcu_tasks_wait_gp+0x47c/0x938 rcu_tasks_one_gp+0x75c/0xef8 rcu_tasks_kthread+0x180/0x1dc kthread+0x3ac/0x74c ret_from_fork+0x10/0x20 [E] try_to_wake_up(:0): [] complete+0xb8/0x1e8 stacktrace: try_to_wake_up+0x374/0x1164 complete+0xb8/0x1e8 wakeme_after_rcu+0x14/0x20 rcu_tasks_invoke_cbs+0x218/0xaa8 rcu_tasks_one_gp+0x834/0xef8 rcu_tasks_kthread+0x180/0x1dc kthread+0x3ac/0x74c ret_from_fork+0x10/0x20 (wait to wake up) stacktrace: __schedule+0xf64/0x3614 schedule+0xcc/0x348 schedule_timeout+0x1a4/0x268 __wait_for_common+0x1c4/0x3f0 __wait_for_completion_state+0x20/0x38 __wait_rcu_gp+0x35c/0x498 synchronize_rcu_tasks_generic+0x14c/0x220 synchronize_rcu_tasks_trace+0x24/0x8c rcu_init_tasks_generic+0x168/0x194 do_one_initcall+0x174/0xa00 kernel_init_freeable+0x744/0x7dc kernel_init+0x78/0x220 ret_from_fork+0x10/0x20 Separating the dept key and map for each of struct rcu_synchronize, ensuring proper tracking for each execution context. Signed-off-by: Yunseong Kim [ Rewrote the changelog. ] Signed-off-by: Byungchul Park --- include/linux/rcupdate_wait.h | 13 ++++++++----- kernel/rcu/rcu.h | 1 + kernel/rcu/update.c | 5 +++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 4c92d4291cce..ee598e70b4bc 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -19,17 +19,20 @@ struct rcu_synchronize { /* This is for debugging. */ struct rcu_gp_oldstate oldstate; + struct dept_map dmap; + struct dept_key dkey; }; void wakeme_after_rcu(struct rcu_head *head); void __wait_rcu_gp(bool checktiny, unsigned int state, int n, call_rcu_func_t *crcu_array, - struct rcu_synchronize *rs_array); + struct rcu_synchronize *rs_array, struct dept_key *dkey); #define _wait_rcu_gp(checktiny, state, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ - __wait_rcu_gp(checktiny, state, ARRAY_SIZE(__crcu_array), __crcu_array, __rs_array); \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + static struct dept_key __key; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, state, ARRAY_SIZE(__crcu_array), __crcu_array, __rs_array, &__key); \ } while (0) #define wait_rcu_gp(...) _wait_rcu_gp(false, TASK_UNINTERRUPTIBLE, __VA_ARGS__) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 9cf01832a6c3..c0d8ea139596 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -12,6 +12,7 @@ #include #include +#include /* * Grace-period counter management. diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index c912b594ba98..82292337d5b0 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -409,7 +409,7 @@ void wakeme_after_rcu(struct rcu_head *head) EXPORT_SYMBOL_GPL(wakeme_after_rcu); void __wait_rcu_gp(bool checktiny, unsigned int state, int n, call_rcu_func_t *crcu_array, - struct rcu_synchronize *rs_array) + struct rcu_synchronize *rs_array, struct dept_key *dkey) { int i; int j; @@ -426,7 +426,8 @@ void __wait_rcu_gp(bool checktiny, unsigned int state, int n, call_rcu_func_t *c break; if (j == i) { init_rcu_head_on_stack(&rs_array[i].head); - init_completion(&rs_array[i].completion); + sdt_map_init_key(&rs_array[i].dmap, dkey); + init_completion_dmap(&rs_array[i].completion, &rs_array[i].dmap); (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu); } } -- 2.17.1