syzbot triggered a warning[1] about the number of mdb entries in a context.
It turned out that there are multiple ways to trigger that warning today
(some got added during the years), the root cause of the problem is that
the increase is done conditionally, and over the years these different
conditions increased so there were new ways to trigger the warning, that is
to do a decrease which wasn't paired with a previous increase.
For example one way to trigger it is with flush:
$ ip l add br0 up type bridge vlan_filtering 1 mcast_snooping 1
$ ip l add dumdum up master br0 type dummy
$ bridge mdb add dev br0 port dumdum grp 239.0.0.1 permanent vid 1
$ ip link set dev br0 down
$ ip link set dev br0 type bridge mcast_vlan_snooping 1
^^^^ this will enable snooping, but will not update mdb_n_entries
because in __br_multicast_enable_port_ctx() we check !netif_running
$ bridge mdb flush dev br0
^^^ this will trigger the warning because it will delete the pg which
we added above, which will try to decrease mdb_n_entries
Fix the problem by removing the conditional increase and always keep the
count up-to-date while the vlan exists. In order to do that we have to
first initialize it on port-vlan context creation, and then always increase
or decrease the value regardless of mcast options. To keep the current
behaviour we have to enforce the mdb limit only if the context is port's or
if the port-vlan's mcast snooping is enabled.
[1]
------------[ cut here ]------------
n == 0
WARNING: net/bridge/br_multicast.c:718 at br_multicast_port_ngroups_dec_one net/bridge/br_multicast.c:718 [inline], CPU#0: syz.4.4607/22043
WARNING: net/bridge/br_multicast.c:718 at br_multicast_port_ngroups_dec net/bridge/br_multicast.c:771 [inline], CPU#0: syz.4.4607/22043
WARNING: net/bridge/br_multicast.c:718 at br_multicast_del_pg+0x1bbe/0x1e20 net/bridge/br_multicast.c:825, CPU#0: syz.4.4607/22043
Modules linked in:
CPU: 0 UID: 0 PID: 22043 Comm: syz.4.4607 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/24/2026
RIP: 0010:br_multicast_port_ngroups_dec_one net/bridge/br_multicast.c:718 [inline]
RIP: 0010:br_multicast_port_ngroups_dec net/bridge/br_multicast.c:771 [inline]
RIP: 0010:br_multicast_del_pg+0x1bbe/0x1e20 net/bridge/br_multicast.c:825
Code: 41 5f 5d e9 04 7a 48 f7 e8 3f 73 5c f7 90 0f 0b 90 e9 cf fd ff ff e8 31 73 5c f7 90 0f 0b 90 e9 16 fd ff ff e8 23 73 5c f7 90 <0f> 0b 90 e9 60 fd ff ff e8 15 73 5c f7 eb 05 e8 0e 73 5c f7 48 8b
RSP: 0018:ffffc9000c207220 EFLAGS: 00010293
RAX: ffffffff8a68042d RBX: ffff88807c6f1800 RCX: ffff888066e90000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: 0000000000000000 R08: ffff888066e90000 R09: 000000000000000c
R10: 000000000000000c R11: 0000000000000000 R12: ffff8880303ef800
R13: dffffc0000000000 R14: ffff888050eb11c4 R15: 1ffff1100a1d6238
FS: 00007fa45921b6c0(0000) GS:ffff8881256f5000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa4591f9ff8 CR3: 0000000081df2000 CR4: 00000000003526f0
Call Trace:
br_mdb_flush_pgs net/bridge/br_mdb.c:1525 [inline]
br_mdb_flush net/bridge/br_mdb.c:1544 [inline]
br_mdb_del_bulk+0x5e2/0xb20 net/bridge/br_mdb.c:1561
rtnl_mdb_del+0x48a/0x640 net/core/rtnetlink.c:-1
rtnetlink_rcv_msg+0x77e/0xbe0 net/core/rtnetlink.c:6967
netlink_rcv_skb+0x232/0x4b0 net/netlink/af_netlink.c:2550
netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline]
netlink_unicast+0x80f/0x9b0 net/netlink/af_netlink.c:1344
netlink_sendmsg+0x813/0xb40 net/netlink/af_netlink.c:1894
sock_sendmsg_nosec net/socket.c:727 [inline]
__sock_sendmsg net/socket.c:742 [inline]
____sys_sendmsg+0xa68/0xad0 net/socket.c:2592
___sys_sendmsg+0x2a5/0x360 net/socket.c:2646
__sys_sendmsg net/socket.c:2678 [inline]
__do_sys_sendmsg net/socket.c:2683 [inline]
__se_sys_sendmsg net/socket.c:2681 [inline]
__x64_sys_sendmsg+0x1bd/0x2a0 net/socket.c:2681
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0xe2/0xf80 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fa45839aeb9
Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fa45921b028 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007fa458615fa0 RCX: 00007fa45839aeb9
RDX: 0000000000000000 RSI: 00002000000000c0 RDI: 0000000000000004
RBP: 00007fa458408c1f R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fa458616038 R14: 00007fa458615fa0 R15: 00007fff0b59fae8
Fixes: b57e8d870d52 ("net: bridge: Maintain number of MDB entries in net_bridge_mcast_port")
Reported-by: syzbot+d5d1b7343531d17bd3c5@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/aYrWbRp83MQR1ife@debil/T/#t
Reviewed-by: Ido Schimmel
Signed-off-by: Nikolay Aleksandrov
---
net/bridge/br_multicast.c | 45 ++++++++++++++++-----------------------
1 file changed, 18 insertions(+), 27 deletions(-)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d55a4ab87837..e9a7e6530401 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -244,14 +244,11 @@ br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
lockdep_assert_held_once(&port->br->multicast_lock);
- if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
- return NULL;
-
/* Take RCU to access the vlan. */
rcu_read_lock();
vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
- if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ if (vlan)
pmctx = &vlan->port_mcast_ctx;
rcu_read_unlock();
@@ -701,7 +698,10 @@ br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
u32 max = READ_ONCE(pmctx->mdb_max_entries);
u32 n = READ_ONCE(pmctx->mdb_n_entries);
- if (max && n >= max) {
+ /* enforce the max limit when it's a port pmctx or a port-vlan pmctx
+ * with snooping enabled
+ */
+ if (!br_multicast_port_ctx_vlan_disabled(pmctx) && max && n >= max) {
NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u",
what, n, max);
return -E2BIG;
@@ -736,9 +736,7 @@ static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
return err;
}
- /* Only count on the VLAN context if VID is given, and if snooping on
- * that VLAN is enabled.
- */
+ /* Only count on the VLAN context if VID is given */
if (!group->vid)
return 0;
@@ -2011,6 +2009,18 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port,
timer_setup(&pmctx->ip6_own_query.timer,
br_ip6_multicast_port_query_expired, 0);
#endif
+ /* initialize mdb_n_entries if a new port vlan is being created */
+ if (vlan) {
+ struct net_bridge_port_group *pg;
+ u32 n = 0;
+
+ spin_lock_bh(&port->br->multicast_lock);
+ hlist_for_each_entry(pg, &port->mglist, mglist)
+ if (pg->key.addr.vid == vlan->vid)
+ n++;
+ WRITE_ONCE(pmctx->mdb_n_entries, n);
+ spin_unlock_bh(&port->br->multicast_lock);
+ }
}
void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
@@ -2094,25 +2104,6 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
br_ip4_multicast_add_router(brmctx, pmctx);
br_ip6_multicast_add_router(brmctx, pmctx);
}
-
- if (br_multicast_port_ctx_is_vlan(pmctx)) {
- struct net_bridge_port_group *pg;
- u32 n = 0;
-
- /* The mcast_n_groups counter might be wrong. First,
- * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries
- * are flushed, thus mcast_n_groups after the toggle does not
- * reflect the true values. And second, permanent entries added
- * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected
- * either. Thus we have to refresh the counter.
- */
-
- hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) {
- if (pg->key.addr.vid == pmctx->vlan->vid)
- n++;
- }
- WRITE_ONCE(pmctx->mdb_n_entries, n);
- }
}
static void br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
--
2.47.3