Using atomic to protect the send_peer_notif instead of rtnl_mutex. This approach allows safe updates in both interrupt and process contexts, while avoiding code complexity. In lacp mode, the rtnl might be locked, preventing ad_cond_set_peer_notif() from acquiring the lock and updating send_peer_notif. This patch addresses the issue by using a atomic. Since updating send_peer_notif does not require high real-time performance, such atomic updates are acceptable. By the way, send_peer_notif is reset atomically. For avoiding peer notify event loss, we should invoke bond_should_notify_peers() to check whether to send peer notify in bond_mii_monitor() and bond_activebackup_arp_mon(). More importantly, send_peer_notif-- should be placed with send_peer_notif *if* block [2]. Otherwise [1], if atomic_dec is executed immediately after resetting the send_peer_notif, it is very likely that the event will be lost. - [1]: if (send_peer_notif) { ... } // if reset and then atomic_dec, event will be lost. atomic_dec() - [2] should be changed: if (send_peer_notif) { ... // if reset and then atomic_dec, event may be lost. // but we are already in notify context. atomic_dec() } Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Suggested-by: Jay Vosburgh Signed-off-by: Tonghao Zhang --- v4: - fix race in bond_activebackup_arp_mon() - check send_peer_notif instead of should_notify_peers. - move send_peer_notif-- in if block. v3: - add the comment, *_dec_if_positive is safe. v2: - refine the codes - check bond_should_notify_peers again in bond_mii_monitor(), to avoid event loss. v1: - https://patchwork.kernel.org/project/netdevbpf/patch/20251026095614.48833-1-tonghao@bamaicloud.com/ --- drivers/net/bonding/bond_3ad.c | 7 +--- drivers/net/bonding/bond_main.c | 71 +++++++++++++++++---------------- include/net/bonding.h | 9 ++++- 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 49717b7b82a2..05c573e45450 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -999,11 +999,8 @@ static void ad_cond_set_peer_notif(struct port *port) { struct bonding *bond = port->slave->bond; - if (bond->params.broadcast_neighbor && rtnl_trylock()) { - bond->send_peer_notif = bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - rtnl_unlock(); - } + if (bond->params.broadcast_neighbor) + bond_peer_notify_reset(bond); } /** diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8e592f37c28b..386efac3bc64 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1167,10 +1167,11 @@ static bool bond_should_notify_peers(struct bonding *bond) { struct bond_up_slave *usable; struct slave *slave = NULL; + int send_peer_notif; - if (!bond->send_peer_notif || - bond->send_peer_notif % - max(1, bond->params.peer_notif_delay) != 0 || + send_peer_notif = atomic_read(&bond->send_peer_notif); + if (!send_peer_notif || + send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 || !netif_carrier_ok(bond->dev)) return false; @@ -1270,8 +1271,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) BOND_SLAVE_NOTIFY_NOW); if (new_active) { - bool should_notify_peers = false; - bond_set_slave_active_flags(new_active, BOND_SLAVE_NOTIFY_NOW); @@ -1280,19 +1279,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) old_active); if (netif_running(bond->dev)) { - bond->send_peer_notif = - bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - should_notify_peers = - bond_should_notify_peers(bond); + bond_peer_notify_reset(bond); + + if (bond_should_notify_peers(bond)) { + atomic_dec(&bond->send_peer_notif); + call_netdevice_notifiers( + NETDEV_NOTIFY_PEERS, + bond->dev); + } } call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); - if (should_notify_peers) { - bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); - } } } @@ -2783,11 +2780,11 @@ static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work); - bool should_notify_peers; - bool commit; - unsigned long delay; - struct slave *slave; struct list_head *iter; + struct slave *slave; + unsigned long delay; + int send_peer_notif; + bool commit; delay = msecs_to_jiffies(bond->params.miimon); @@ -2796,12 +2793,12 @@ static void bond_mii_monitor(struct work_struct *work) rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); + send_peer_notif = atomic_read(&bond->send_peer_notif); commit = !!bond_miimon_inspect(bond); rcu_read_unlock(); - if (commit || bond->send_peer_notif) { + if (commit || send_peer_notif) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; @@ -2816,11 +2813,13 @@ static void bond_mii_monitor(struct work_struct *work) bond_miimon_commit(bond); } - if (bond->send_peer_notif) { - bond->send_peer_notif--; - if (should_notify_peers) + if (send_peer_notif) { + if (bond_should_notify_peers(bond)) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + + /* It's safe even when send_peer_notif was reset. */ + atomic_dec_if_positive(&bond->send_peer_notif); } rtnl_unlock(); /* might sleep, hold no other locks */ @@ -3732,8 +3731,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) static void bond_activebackup_arp_mon(struct bonding *bond) { - bool should_notify_peers = false; bool should_notify_rtnl = false; + int send_peer_notif = 0; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -3743,15 +3742,12 @@ static void bond_activebackup_arp_mon(struct bonding *bond) rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); - if (bond_ab_arp_inspect(bond)) { rcu_read_unlock(); /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; - should_notify_peers = false; goto re_arm; } @@ -3762,21 +3758,26 @@ static void bond_activebackup_arp_mon(struct bonding *bond) } should_notify_rtnl = bond_ab_arp_probe(bond); + send_peer_notif = atomic_read(&bond->send_peer_notif); + rcu_read_unlock(); re_arm: if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - if (should_notify_peers || should_notify_rtnl) { + if (send_peer_notif || should_notify_rtnl) { if (!rtnl_trylock()) return; - if (should_notify_peers) { - bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); + if (send_peer_notif) { + if (bond_should_notify_peers(bond)) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, + bond->dev); + /* It's safe even when send_peer_notif was reset. */ + atomic_dec_if_positive(&bond->send_peer_notif); } + if (should_notify_rtnl) { bond_slave_state_notify(bond); bond_slave_link_notify(bond); @@ -4267,6 +4268,8 @@ static int bond_open(struct net_device *bond_dev) queue_delayed_work(bond->wq, &bond->alb_work, 0); } + atomic_set(&bond->send_peer_notif, 0); + if (bond->params.miimon) /* link check interval, in milliseconds. */ queue_delayed_work(bond->wq, &bond->mii_work, 0); @@ -4300,7 +4303,7 @@ static int bond_close(struct net_device *bond_dev) struct slave *slave; bond_work_cancel_all(bond); - bond->send_peer_notif = 0; + atomic_set(&bond->send_peer_notif, 0); if (bond_is_lb(bond)) bond_alb_deinitialize(bond); bond->recv_probe = NULL; diff --git a/include/net/bonding.h b/include/net/bonding.h index 49edc7da0586..afdfcb5bfaf0 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -236,7 +236,7 @@ struct bonding { */ spinlock_t mode_lock; spinlock_t stats_lock; - u32 send_peer_notif; + atomic_t send_peer_notif; u8 igmp_retrans; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; @@ -814,4 +814,11 @@ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *s return NET_XMIT_DROP; } +static inline void bond_peer_notify_reset(struct bonding *bond) +{ + atomic_set(&bond->send_peer_notif, + bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay)); +} + #endif /* _NET_BONDING_H */ -- 2.34.1