Using atomic to protect the send_peer_notif instead of rtnl_mutex. This approach allows safe updates in both interrupt and process contexts, while avoiding code complexity. In lacp mode, the rtnl might be locked, preventing ad_cond_set_peer_notif() from acquiring the lock and updating send_peer_notif. This patch addresses the issue by using a atomic. Since updating send_peer_notif does not require high real-time performance, such atomic updates are acceptable. After coverting the rtnl lock for send_peer_notif to atomic, in bond_mii_monitor(), we should check the should_notify_peers (rtnllock required) instead of send_peer_notif. By the way, to avoid peer notify event loss, we check again whether to send peer notify, such as active-backup mode failover. Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Suggested-by: Jay Vosburgh Signed-off-by: Tonghao Zhang --- v2: - refine the codes - check bond_should_notify_peers again in bond_mii_monitor(), to avoid event loss. - v1 https://patchwork.kernel.org/project/netdevbpf/patch/20251026095614.48833-1-tonghao@bamaicloud.com/ --- drivers/net/bonding/bond_3ad.c | 7 ++--- drivers/net/bonding/bond_main.c | 46 ++++++++++++++++----------------- include/net/bonding.h | 9 ++++++- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 49717b7b82a2..05c573e45450 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -999,11 +999,8 @@ static void ad_cond_set_peer_notif(struct port *port) { struct bonding *bond = port->slave->bond; - if (bond->params.broadcast_neighbor && rtnl_trylock()) { - bond->send_peer_notif = bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - rtnl_unlock(); - } + if (bond->params.broadcast_neighbor) + bond_peer_notify_reset(bond); } /** diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8e592f37c28b..ae90221838d4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1167,10 +1167,11 @@ static bool bond_should_notify_peers(struct bonding *bond) { struct bond_up_slave *usable; struct slave *slave = NULL; + int send_peer_notif; - if (!bond->send_peer_notif || - bond->send_peer_notif % - max(1, bond->params.peer_notif_delay) != 0 || + send_peer_notif = atomic_read(&bond->send_peer_notif); + if (!send_peer_notif || + send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 || !netif_carrier_ok(bond->dev)) return false; @@ -1270,8 +1271,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) BOND_SLAVE_NOTIFY_NOW); if (new_active) { - bool should_notify_peers = false; - bond_set_slave_active_flags(new_active, BOND_SLAVE_NOTIFY_NOW); @@ -1280,19 +1279,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) old_active); if (netif_running(bond->dev)) { - bond->send_peer_notif = - bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - should_notify_peers = - bond_should_notify_peers(bond); + bond_peer_notify_reset(bond); + + if (bond_should_notify_peers(bond)) { + atomic_dec(&bond->send_peer_notif); + call_netdevice_notifiers( + NETDEV_NOTIFY_PEERS, + bond->dev); + } } call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); - if (should_notify_peers) { - bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); - } } } @@ -2801,7 +2798,7 @@ static void bond_mii_monitor(struct work_struct *work) rcu_read_unlock(); - if (commit || bond->send_peer_notif) { + if (commit || should_notify_peers) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; @@ -2816,16 +2813,15 @@ static void bond_mii_monitor(struct work_struct *work) bond_miimon_commit(bond); } - if (bond->send_peer_notif) { - bond->send_peer_notif--; - if (should_notify_peers) - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); - } + /* check again to avoid send_peer_notif has been changed. */ + if (bond_should_notify_peers(bond)) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); rtnl_unlock(); /* might sleep, hold no other locks */ } + atomic_dec_if_positive(&bond->send_peer_notif); + re_arm: if (bond->params.miimon) queue_delayed_work(bond->wq, &bond->mii_work, delay); @@ -3773,7 +3769,7 @@ static void bond_activebackup_arp_mon(struct bonding *bond) return; if (should_notify_peers) { - bond->send_peer_notif--; + atomic_dec(&bond->send_peer_notif); call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); } @@ -4267,6 +4263,8 @@ static int bond_open(struct net_device *bond_dev) queue_delayed_work(bond->wq, &bond->alb_work, 0); } + atomic_set(&bond->send_peer_notif, 0); + if (bond->params.miimon) /* link check interval, in milliseconds. */ queue_delayed_work(bond->wq, &bond->mii_work, 0); @@ -4300,7 +4298,7 @@ static int bond_close(struct net_device *bond_dev) struct slave *slave; bond_work_cancel_all(bond); - bond->send_peer_notif = 0; + atomic_set(&bond->send_peer_notif, 0); if (bond_is_lb(bond)) bond_alb_deinitialize(bond); bond->recv_probe = NULL; diff --git a/include/net/bonding.h b/include/net/bonding.h index 49edc7da0586..afdfcb5bfaf0 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -236,7 +236,7 @@ struct bonding { */ spinlock_t mode_lock; spinlock_t stats_lock; - u32 send_peer_notif; + atomic_t send_peer_notif; u8 igmp_retrans; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; @@ -814,4 +814,11 @@ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *s return NET_XMIT_DROP; } +static inline void bond_peer_notify_reset(struct bonding *bond) +{ + atomic_set(&bond->send_peer_notif, + bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay)); +} + #endif /* _NET_BONDING_H */ -- 2.34.1