Commit 1767bb2d47b7 ("ipv6: mcast: Don't hold RTNL for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP.") removed the RTNL lock for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP operations. However, this change triggered the following call trace on my BeagleBone Black board: WARNING: net/8021q/vlan_core.c:236 at vlan_for_each+0x120/0x124, CPU#0: rpcbind/496 RTNL: assertion failed at net/8021q/vlan_core.c (236) Modules linked in: CPU: 0 UID: 997 PID: 496 Comm: rpcbind Not tainted 6.19.0-rc6-next-20260122-yocto-standard+ #8 PREEMPT Hardware name: Generic AM33XX (Flattened Device Tree) Call trace: unwind_backtrace from show_stack+0x28/0x2c show_stack from dump_stack_lvl+0x30/0x38 dump_stack_lvl from __warn+0xb8/0x11c __warn from warn_slowpath_fmt+0x130/0x194 warn_slowpath_fmt from vlan_for_each+0x120/0x124 vlan_for_each from cpsw_add_mc_addr+0x54/0xd8 cpsw_add_mc_addr from __hw_addr_ref_sync_dev+0xc4/0xec __hw_addr_ref_sync_dev from __dev_mc_add+0x78/0x88 __dev_mc_add from igmp6_group_added+0x84/0xec igmp6_group_added from __ipv6_dev_mc_inc+0x1fc/0x2f0 __ipv6_dev_mc_inc from __ipv6_sock_mc_join+0x124/0x1b4 __ipv6_sock_mc_join from do_ipv6_setsockopt+0x84c/0x1168 do_ipv6_setsockopt from ipv6_setsockopt+0x88/0xc8 ipv6_setsockopt from do_sock_setsockopt+0xe8/0x19c do_sock_setsockopt from __sys_setsockopt+0x84/0xac __sys_setsockopt from ret_fast_syscall+0x0/0x5 This trace occurs because vlan_for_each() is called within cpsw_ndo_set_rx_mode(), which expects the RTNL lock to be held. Since modifying vlan_for_each() to operate without the RTNL lock is not straightforward, and because ndo_set_rx_mode() is invoked both with and without the RTNL lock across different code paths, simply adding rtnl_lock() in cpsw_ndo_set_rx_mode() is not a viable solution. To resolve this issue, we opt to execute the actual processing within a work queue, following the approach used by the icssg-prueth driver. Fixes: 1767bb2d47b7 ("ipv6: mcast: Don't hold RTNL for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP.") Signed-off-by: Kevin Hao Cc: stable@vger.kernel.org --- Changes in v4: - Using schedule_work() instead of creating a dedicated workqueue. - Link to v3: https://lore.kernel.org/r/20260127-bbb-v3-1-5e71f340c1e9@gmail.com Changes in v3: - Resolve the deadlock issue identified in the AI review [2] by moving the netif_running() check under the RTNL lock and removing the cancel_work_sync() call in cpsw_ndo_stop(). - Link to v2: https://lore.kernel.org/r/20260125-bbb-v2-1-1547ffabc9d3@gmail.com Changes in v2: - Addresses the issue identified in the AI review [1]: - Adds a netif_running() check in cpsw_ndo_set_rx_mode_work() - Cancels the rx_mode_work in cpsw_ndo_stop() - Link to v1: https://lore.kernel.org/r/20260123-bbb-v1-1-176b0b71834d@gmail.com [1] https://netdev-ai.bots.linux.dev/ai-review.html?id=bd885e1e-1aed-4755-ad60-7150737ad0f5 [2] https://netdev-ai.bots.linux.dev/ai-review.html?id=c9fc3cf8-a06c-4cb8-b26b-910e775951a0 --- Please note that the cpsw driver also has the same issue. If this resolution is acceptable, I will create another patch to fix the issue in cpsw. Cc: Siddharth Vadapalli Cc: Roger Quadros Cc: Andrew Lunn Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Vladimir Oltean Cc: Kuniyuki Iwashima Cc: linux-omap@vger.kernel.org --- drivers/net/ethernet/ti/cpsw_new.c | 33 +++++++++++++++++++++++++++++---- drivers/net/ethernet/ti/cpsw_priv.h | 1 + 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index ab88d4c02cbde76207f89cf433e2b383dcde6a83..744657cad0927da21232969d07c15e68f44811e3 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -248,15 +248,25 @@ static int cpsw_purge_all_mc(struct net_device *ndev, const u8 *addr, int num) return 0; } -static void cpsw_ndo_set_rx_mode(struct net_device *ndev) +static void cpsw_ndo_set_rx_mode_work(struct work_struct *work) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_priv *priv = container_of(work, struct cpsw_priv, rx_mode_work); struct cpsw_common *cpsw = priv->cpsw; + struct net_device *ndev = priv->ndev; + rtnl_lock(); + if (!netif_running(ndev)) { + rtnl_unlock(); + return; + } + + netif_addr_lock_bh(ndev); if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ cpsw_set_promiscious(ndev, true); cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI, priv->emac_port); + netif_addr_unlock_bh(ndev); + rtnl_unlock(); return; } @@ -270,6 +280,15 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) /* add/remove mcast address either for real netdev or for vlan */ __hw_addr_ref_sync_dev(&ndev->mc, ndev, cpsw_add_mc_addr, cpsw_del_mc_addr); + netif_addr_unlock_bh(ndev); + rtnl_unlock(); +} + +static void cpsw_ndo_set_rx_mode(struct net_device *ndev) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + schedule_work(&priv->rx_mode_work); } static unsigned int cpsw_rxbuf_total_len(unsigned int len) @@ -1398,6 +1417,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); priv->emac_port = i + 1; priv->tx_packet_min = CPSW_MIN_PACKET_SIZE; + INIT_WORK(&priv->rx_mode_work, cpsw_ndo_set_rx_mode_work); if (is_valid_ether_addr(slave_data->mac_addr)) { ether_addr_copy(priv->mac_addr, slave_data->mac_addr); @@ -1447,13 +1467,18 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) static void cpsw_unregister_ports(struct cpsw_common *cpsw) { + struct net_device *ndev; + struct cpsw_priv *priv; int i = 0; for (i = 0; i < cpsw->data.slaves; i++) { - if (!cpsw->slaves[i].ndev) + ndev = cpsw->slaves[i].ndev; + if (!ndev) continue; - unregister_netdev(cpsw->slaves[i].ndev); + priv = netdev_priv(ndev); + disable_work_sync(&priv->rx_mode_work); + unregister_netdev(ndev); } } diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 91add8925e235c6cf5542fde11f3383b9234c872..acb6181c5c9e1bf6ed46a7fd14ce422efc0b724e 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -391,6 +391,7 @@ struct cpsw_priv { u32 tx_packet_min; struct cpsw_ale_ratelimit ale_bc_ratelimit; struct cpsw_ale_ratelimit ale_mc_ratelimit; + struct work_struct rx_mode_work; }; #define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) --- base-commit: 33a647c659ffa5bdb94abc345c8c86768ff96215 change-id: 20260123-bbb-dc3675f671d0 Best regards, -- Kevin Hao