The rtnl lock might be locked, preventing ad_cond_set_peer_notif() from acquiring the lock and updating send_peer_notif. This patch addresses the issue by using a workqueue. Since updating send_peer_notif does not require high real-time performance, such delayed updates are entirely acceptable. In fact, checking this value and using it in multiple places, all operations are protected at the same time by rtnl lock, such as - read send_peer_notif - send_peer_notif-- - bond_should_notify_peers By the way, rtnl lock is still required, when accessing bond.params.* for updating send_peer_notif. In lacp mode, resetting send_peer_notif in workqueue is safe, simple and effective way. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Suggested-by: Hangbin Liu Signed-off-by: Tonghao Zhang --- v1: - This patch is actually version v3, https://patchwork.kernel.org/project/netdevbpf/patch/20251118090305.35558-1-tonghao@bamaicloud.com/ - add a comment why we use the trylock. - add this patch to series --- drivers/net/bonding/bond_3ad.c | 7 ++--- drivers/net/bonding/bond_main.c | 55 ++++++++++++++++++++++++++------- include/net/bonding.h | 2 ++ 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 1a8de2bf8655..01ae0269a138 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1008,11 +1008,8 @@ static void ad_cond_set_peer_notif(struct port *port) { struct bonding *bond = port->slave->bond; - if (bond->params.broadcast_neighbor && rtnl_trylock()) { - bond->send_peer_notif = bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - rtnl_unlock(); - } + if (bond->params.broadcast_neighbor) + bond_peer_notify_work_rearm(bond, 0); } /** diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3d56339a8a10..811ced7680c1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1195,6 +1195,35 @@ static bool bond_should_notify_peers(struct bonding *bond) return true; } +/* Use this to update send_peer_notif when RTNL may be held in other places. */ +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay) +{ + queue_delayed_work(bond->wq, &bond->peer_notify_work, delay); +} + +/* Peer notify update handler. Holds only RTNL */ +static void bond_peer_notify_reset(struct bonding *bond) +{ + bond->send_peer_notif = bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay); +} + +static void bond_peer_notify_handler(struct work_struct *work) +{ + struct bonding *bond = container_of(work, struct bonding, + peer_notify_work.work); + + if (!rtnl_trylock()) { + bond_peer_notify_work_rearm(bond, 1); + return; + } + + bond_peer_notify_reset(bond); + + rtnl_unlock(); + return; +} + /** * bond_change_active_slave - change the active slave into the specified one * @bond: our bonding struct @@ -1270,8 +1299,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) BOND_SLAVE_NOTIFY_NOW); if (new_active) { - bool should_notify_peers = false; - bond_set_slave_active_flags(new_active, BOND_SLAVE_NOTIFY_NOW); @@ -1280,19 +1307,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) old_active); if (netif_running(bond->dev)) { - bond->send_peer_notif = - bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - should_notify_peers = - bond_should_notify_peers(bond); + bond_peer_notify_reset(bond); + + if (bond_should_notify_peers(bond)) { + bond->send_peer_notif--; + call_netdevice_notifiers( + NETDEV_NOTIFY_PEERS, + bond->dev); + } } call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); - if (should_notify_peers) { - bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); - } } } @@ -4213,6 +4238,10 @@ static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) void bond_work_init_all(struct bonding *bond) { + /* .ndo_stop(), bond_close() will try to flush the work under + * the rtnl lock. The workqueue must not block on rtnl lock + * to avoid deadlock. + */ INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed); INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); @@ -4220,6 +4249,7 @@ void bond_work_init_all(struct bonding *bond) INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor); INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler); + INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler); } void bond_work_cancel_all(struct bonding *bond) @@ -4230,6 +4260,7 @@ void bond_work_cancel_all(struct bonding *bond) cancel_delayed_work_sync(&bond->ad_work); cancel_delayed_work_sync(&bond->mcast_work); cancel_delayed_work_sync(&bond->slave_arr_work); + cancel_delayed_work_sync(&bond->peer_notify_work); } static int bond_open(struct net_device *bond_dev) diff --git a/include/net/bonding.h b/include/net/bonding.h index 49edc7da0586..63d08056a4a4 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -254,6 +254,7 @@ struct bonding { struct delayed_work ad_work; struct delayed_work mcast_work; struct delayed_work slave_arr_work; + struct delayed_work peer_notify_work; #ifdef CONFIG_DEBUG_FS /* debugging support via debugfs */ struct dentry *debug_dir; @@ -709,6 +710,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, int level); int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay); void bond_work_init_all(struct bonding *bond); void bond_work_cancel_all(struct bonding *bond); -- 2.34.1 This patch trys to fix the possible peer notify event loss. In bond_mii_monitor()/bond_activebackup_arp_mon(), when we hold the rtnl lock: - check send_peer_notif again to avoid unconditionally reducing this value. - send_peer_notif may have been reset. Therefore, it is necessary to check whether to send peer notify via bond_should_notify_peers() to avoid the loss of notification events. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Signed-off-by: Tonghao Zhang --- v1: - splitted from: https://patchwork.kernel.org/project/netdevbpf/patch/20251118090431.35654-1-tonghao@bamaicloud.com/ - this patch only move the bond_should_notify_peers to rtnl lock. - add this patch to series --- drivers/net/bonding/bond_main.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 811ced7680c1..1b16c4cd90e0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2809,11 +2809,10 @@ static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work); - bool should_notify_peers; - bool commit; - unsigned long delay; - struct slave *slave; struct list_head *iter; + struct slave *slave; + unsigned long delay; + bool commit; delay = msecs_to_jiffies(bond->params.miimon); @@ -2822,7 +2821,6 @@ static void bond_mii_monitor(struct work_struct *work) rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); commit = !!bond_miimon_inspect(bond); rcu_read_unlock(); @@ -2843,10 +2841,10 @@ static void bond_mii_monitor(struct work_struct *work) } if (bond->send_peer_notif) { - bond->send_peer_notif--; - if (should_notify_peers) + if (bond_should_notify_peers(bond)) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + bond->send_peer_notif--; } rtnl_unlock(); /* might sleep, hold no other locks */ @@ -3758,7 +3756,6 @@ static bool bond_ab_arp_probe(struct bonding *bond) static void bond_activebackup_arp_mon(struct bonding *bond) { - bool should_notify_peers = false; bool should_notify_rtnl = false; int delta_in_ticks; @@ -3769,15 +3766,12 @@ static void bond_activebackup_arp_mon(struct bonding *bond) rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); - if (bond_ab_arp_inspect(bond)) { rcu_read_unlock(); /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; - should_notify_peers = false; goto re_arm; } @@ -3794,14 +3788,15 @@ static void bond_activebackup_arp_mon(struct bonding *bond) if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - if (should_notify_peers || should_notify_rtnl) { + if (bond->send_peer_notif || should_notify_rtnl) { if (!rtnl_trylock()) return; - if (should_notify_peers) { + if (bond->send_peer_notif) { + if (bond_should_notify_peers(bond)) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, + bond->dev); bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); } if (should_notify_rtnl) { bond_slave_state_notify(bond); -- 2.34.1 After the first trylock fail, retrying immediately is not advised as there is a high probability of failing to acquire the lock again. This optimization makes sense. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Signed-off-by: Tonghao Zhang --- v1: - splitted from: https://patchwork.kernel.org/project/netdevbpf/patch/20251118090431.35654-1-tonghao@bamaicloud.com/ - this patch only skip the 2nd rtnl lock. - add this patch to series --- drivers/net/bonding/bond_main.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1b16c4cd90e0..025ca0a45615 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3756,7 +3756,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) static void bond_activebackup_arp_mon(struct bonding *bond) { - bool should_notify_rtnl = false; + bool should_notify_rtnl; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -3784,13 +3784,11 @@ static void bond_activebackup_arp_mon(struct bonding *bond) should_notify_rtnl = bond_ab_arp_probe(bond); rcu_read_unlock(); -re_arm: - if (bond->params.arp_interval) - queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - if (bond->send_peer_notif || should_notify_rtnl) { - if (!rtnl_trylock()) - return; + if (!rtnl_trylock()) { + delta_in_ticks = 1; + goto re_arm; + } if (bond->send_peer_notif) { if (bond_should_notify_peers(bond)) @@ -3805,6 +3803,10 @@ static void bond_activebackup_arp_mon(struct bonding *bond) rtnl_unlock(); } + +re_arm: + if (bond->params.arp_interval) + queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); } static void bond_arp_monitor(struct work_struct *work) -- 2.34.1 Although operations on the variable send_peer_notif are already within a lock-protected critical section, there are cases where it is accessed outside the lock. Therefore, READ_ONCE() and WRITE_ONCE() should be added to it. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Signed-off-by: Tonghao Zhang --- v2: fix compilation errors --- drivers/net/bonding/bond_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 025ca0a45615..14396e39b1f0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1204,8 +1204,9 @@ void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay) /* Peer notify update handler. Holds only RTNL */ static void bond_peer_notify_reset(struct bonding *bond) { - bond->send_peer_notif = bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); + WRITE_ONCE(bond->send_peer_notif, + bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay)); } static void bond_peer_notify_handler(struct work_struct *work) @@ -2825,7 +2826,7 @@ static void bond_mii_monitor(struct work_struct *work) rcu_read_unlock(); - if (commit || bond->send_peer_notif) { + if (commit || READ_ONCE(bond->send_peer_notif)) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; @@ -3784,7 +3785,7 @@ static void bond_activebackup_arp_mon(struct bonding *bond) should_notify_rtnl = bond_ab_arp_probe(bond); rcu_read_unlock(); - if (bond->send_peer_notif || should_notify_rtnl) { + if (READ_ONCE(bond->send_peer_notif) || should_notify_rtnl) { if (!rtnl_trylock()) { delta_in_ticks = 1; goto re_arm; -- 2.34.1 Place all the arp monitor in a common critical section, to avoid requesting rtnl lock frequently. Note that mii monitor also use the common critical section. It should be noted that the bond_ab_arp_probe() sending probe arp may be lost in this cycle, when committing link changed state. It is acceptable. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Signed-off-by: Tonghao Zhang --- drivers/net/bonding/bond_main.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 14396e39b1f0..2cfaac3edde0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3757,7 +3757,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) static void bond_activebackup_arp_mon(struct bonding *bond) { - bool should_notify_rtnl; + bool should_notify_rtnl, commit; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -3767,36 +3767,28 @@ static void bond_activebackup_arp_mon(struct bonding *bond) rcu_read_lock(); - if (bond_ab_arp_inspect(bond)) { - rcu_read_unlock(); - - /* Race avoidance with bond_close flush of workqueue */ - if (!rtnl_trylock()) { - delta_in_ticks = 1; - goto re_arm; - } - - bond_ab_arp_commit(bond); - - rtnl_unlock(); - rcu_read_lock(); - } - + commit = !!bond_ab_arp_inspect(bond); should_notify_rtnl = bond_ab_arp_probe(bond); + rcu_read_unlock(); - if (READ_ONCE(bond->send_peer_notif) || should_notify_rtnl) { + if (commit || READ_ONCE(bond->send_peer_notif) || should_notify_rtnl) { + /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; goto re_arm; } + if (commit) + bond_ab_arp_commit(bond); + if (bond->send_peer_notif) { if (bond_should_notify_peers(bond)) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); bond->send_peer_notif--; } + if (should_notify_rtnl) { bond_slave_state_notify(bond); bond_slave_link_notify(bond); -- 2.34.1