The rtnl lock might be locked, preventing ad_cond_set_peer_notif() from acquiring the lock and updating send_peer_notif. This patch addresses the issue by using a workqueue. Since updating send_peer_notif does not require high real-time performance, such delayed updates are entirely acceptable. In fact, checking this value and using it in multiple places, all operations are protected at the same time by rtnl lock, such as - read send_peer_notif - send_peer_notif-- - bond_should_notify_peers By the way, rtnl lock is still required, when accessing bond.params.* for updating send_peer_notif. In lacp mode, resetting send_peer_notif in workqueue is safe, simple and effective way. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Suggested-by: Hangbin Liu Signed-off-by: Tonghao Zhang --- v4: - keep the netdevice notifier order. v2/3: - no change v1: - This patch is actually version v3, https://patchwork.kernel.org/project/netdevbpf/patch/20251118090305.35558-1-tonghao@bamaicloud.com/ - add a comment why we use the trylock. - add this patch to series --- drivers/net/bonding/bond_3ad.c | 7 ++-- drivers/net/bonding/bond_main.c | 57 +++++++++++++++++++++++++-------- include/net/bonding.h | 2 ++ 3 files changed, 48 insertions(+), 18 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 1a8de2bf8655..01ae0269a138 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1008,11 +1008,8 @@ static void ad_cond_set_peer_notif(struct port *port) { struct bonding *bond = port->slave->bond; - if (bond->params.broadcast_neighbor && rtnl_trylock()) { - bond->send_peer_notif = bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - rtnl_unlock(); - } + if (bond->params.broadcast_neighbor) + bond_peer_notify_work_rearm(bond, 0); } /** diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3d56339a8a10..edf6dac8a98f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1195,6 +1195,35 @@ static bool bond_should_notify_peers(struct bonding *bond) return true; } +/* Use this to update send_peer_notif when RTNL may be held in other places. */ +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay) +{ + queue_delayed_work(bond->wq, &bond->peer_notify_work, delay); +} + +/* Peer notify update handler. Holds only RTNL */ +static void bond_peer_notify_reset(struct bonding *bond) +{ + bond->send_peer_notif = bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay); +} + +static void bond_peer_notify_handler(struct work_struct *work) +{ + struct bonding *bond = container_of(work, struct bonding, + peer_notify_work.work); + + if (!rtnl_trylock()) { + bond_peer_notify_work_rearm(bond, 1); + return; + } + + bond_peer_notify_reset(bond); + + rtnl_unlock(); + return; +} + /** * bond_change_active_slave - change the active slave into the specified one * @bond: our bonding struct @@ -1270,8 +1299,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) BOND_SLAVE_NOTIFY_NOW); if (new_active) { - bool should_notify_peers = false; - bond_set_slave_active_flags(new_active, BOND_SLAVE_NOTIFY_NOW); @@ -1279,19 +1306,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) bond_do_fail_over_mac(bond, new_active, old_active); + call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); + if (netif_running(bond->dev)) { - bond->send_peer_notif = - bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - should_notify_peers = - bond_should_notify_peers(bond); - } + bond_peer_notify_reset(bond); - call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); - if (should_notify_peers) { - bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); + if (bond_should_notify_peers(bond)) { + bond->send_peer_notif--; + call_netdevice_notifiers( + NETDEV_NOTIFY_PEERS, + bond->dev); + } } } } @@ -4213,6 +4238,10 @@ static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) void bond_work_init_all(struct bonding *bond) { + /* ndo_stop, bond_close() will try to flush the work under + * the rtnl lock. The workqueue must not block on rtnl lock + * to avoid deadlock. + */ INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed); INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); @@ -4220,6 +4249,7 @@ void bond_work_init_all(struct bonding *bond) INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor); INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler); + INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler); } void bond_work_cancel_all(struct bonding *bond) @@ -4230,6 +4260,7 @@ void bond_work_cancel_all(struct bonding *bond) cancel_delayed_work_sync(&bond->ad_work); cancel_delayed_work_sync(&bond->mcast_work); cancel_delayed_work_sync(&bond->slave_arr_work); + cancel_delayed_work_sync(&bond->peer_notify_work); } static int bond_open(struct net_device *bond_dev) diff --git a/include/net/bonding.h b/include/net/bonding.h index 49edc7da0586..63d08056a4a4 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -254,6 +254,7 @@ struct bonding { struct delayed_work ad_work; struct delayed_work mcast_work; struct delayed_work slave_arr_work; + struct delayed_work peer_notify_work; #ifdef CONFIG_DEBUG_FS /* debugging support via debugfs */ struct dentry *debug_dir; @@ -709,6 +710,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, int level); int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay); void bond_work_init_all(struct bonding *bond); void bond_work_cancel_all(struct bonding *bond); -- 2.34.1 This patch tries to avoid the possible peer notify event loss. In bond_mii_monitor()/bond_activebackup_arp_mon(), when we hold the rtnl lock: - check send_peer_notif again to avoid unconditionally reducing this value. - send_peer_notif may have been reset. Therefore, it is necessary to check whether to send peer notify via bond_should_notify_peers() to avoid the loss of notification events. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Signed-off-by: Tonghao Zhang Reviewed-by: Hangbin Liu --- v2-4: - no change v1: - splitted from: https://patchwork.kernel.org/project/netdevbpf/patch/20251118090431.35654-1-tonghao@bamaicloud.com/ - this patch only move the bond_should_notify_peers to rtnl lock. - add this patch to series --- drivers/net/bonding/bond_main.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index edf6dac8a98f..8be7f52e847c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2809,11 +2809,10 @@ static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work); - bool should_notify_peers; - bool commit; - unsigned long delay; - struct slave *slave; struct list_head *iter; + struct slave *slave; + unsigned long delay; + bool commit; delay = msecs_to_jiffies(bond->params.miimon); @@ -2822,7 +2821,6 @@ static void bond_mii_monitor(struct work_struct *work) rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); commit = !!bond_miimon_inspect(bond); rcu_read_unlock(); @@ -2843,10 +2841,10 @@ static void bond_mii_monitor(struct work_struct *work) } if (bond->send_peer_notif) { - bond->send_peer_notif--; - if (should_notify_peers) + if (bond_should_notify_peers(bond)) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + bond->send_peer_notif--; } rtnl_unlock(); /* might sleep, hold no other locks */ @@ -3758,7 +3756,6 @@ static bool bond_ab_arp_probe(struct bonding *bond) static void bond_activebackup_arp_mon(struct bonding *bond) { - bool should_notify_peers = false; bool should_notify_rtnl = false; int delta_in_ticks; @@ -3769,15 +3766,12 @@ static void bond_activebackup_arp_mon(struct bonding *bond) rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); - if (bond_ab_arp_inspect(bond)) { rcu_read_unlock(); /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; - should_notify_peers = false; goto re_arm; } @@ -3794,14 +3788,15 @@ static void bond_activebackup_arp_mon(struct bonding *bond) if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - if (should_notify_peers || should_notify_rtnl) { + if (bond->send_peer_notif || should_notify_rtnl) { if (!rtnl_trylock()) return; - if (should_notify_peers) { + if (bond->send_peer_notif) { + if (bond_should_notify_peers(bond)) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, + bond->dev); bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); } if (should_notify_rtnl) { bond_slave_state_notify(bond); -- 2.34.1 After the first trylock fail, retrying immediately is not advised as there is a high probability of failing to acquire the lock again. This optimization makes sense. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Signed-off-by: Tonghao Zhang --- v2-4: - no change v1: - splitted from: https://patchwork.kernel.org/project/netdevbpf/patch/20251118090431.35654-1-tonghao@bamaicloud.com/ - this patch only skip the 2nd rtnl lock. - add this patch to series --- drivers/net/bonding/bond_main.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8be7f52e847c..b835f63d2871 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3756,7 +3756,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) static void bond_activebackup_arp_mon(struct bonding *bond) { - bool should_notify_rtnl = false; + bool should_notify_rtnl; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -3784,13 +3784,11 @@ static void bond_activebackup_arp_mon(struct bonding *bond) should_notify_rtnl = bond_ab_arp_probe(bond); rcu_read_unlock(); -re_arm: - if (bond->params.arp_interval) - queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - if (bond->send_peer_notif || should_notify_rtnl) { - if (!rtnl_trylock()) - return; + if (!rtnl_trylock()) { + delta_in_ticks = 1; + goto re_arm; + } if (bond->send_peer_notif) { if (bond_should_notify_peers(bond)) @@ -3805,6 +3803,10 @@ static void bond_activebackup_arp_mon(struct bonding *bond) rtnl_unlock(); } + +re_arm: + if (bond->params.arp_interval) + queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); } static void bond_arp_monitor(struct work_struct *work) -- 2.34.1 Although operations on the variable send_peer_notif are already within a lock-protected critical section, there are cases where it is accessed outside the lock. Therefore, READ_ONCE() and WRITE_ONCE() should be added to it. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Cc: Jason Xing Signed-off-by: Tonghao Zhang Reviewed-by: Hangbin Liu --- v3/4: - no change v2: fix compilation errors --- drivers/net/bonding/bond_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b835f63d2871..909c01f55744 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1204,8 +1204,9 @@ void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay) /* Peer notify update handler. Holds only RTNL */ static void bond_peer_notify_reset(struct bonding *bond) { - bond->send_peer_notif = bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); + WRITE_ONCE(bond->send_peer_notif, + bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay)); } static void bond_peer_notify_handler(struct work_struct *work) @@ -2825,7 +2826,7 @@ static void bond_mii_monitor(struct work_struct *work) rcu_read_unlock(); - if (commit || bond->send_peer_notif) { + if (commit || READ_ONCE(bond->send_peer_notif)) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; @@ -3784,7 +3785,7 @@ static void bond_activebackup_arp_mon(struct bonding *bond) should_notify_rtnl = bond_ab_arp_probe(bond); rcu_read_unlock(); - if (bond->send_peer_notif || should_notify_rtnl) { + if (READ_ONCE(bond->send_peer_notif) || should_notify_rtnl) { if (!rtnl_trylock()) { delta_in_ticks = 1; goto re_arm; -- 2.34.1