From: Haoze Xie The br_netfilter fake rtable is embedded in struct net_bridge and is attached to bridged packets with skb_dst_set_noref(). If such a packet is queued to NFQUEUE, __nf_queue() upgrades that fake dst with skb_dst_force(). At that point the queued skb can hold a real dst reference after bridge teardown has started. The problem is not that every bridged packet needs its own dst reference. The problem is that NFQUEUE can keep the bridge private fake dst alive after unregister begins. Fix this by keeping the bridge fake dst model unchanged and pinning the bridge master device only while the packet sits in NFQUEUE. Record the bridge device in nf_queue_entry when the queued skb carries a bridge fake dst, take a device reference for the queue lifetime, and drop it when the queue entry is freed. Also make sure queued entries are reaped when that bridge device goes down, and drop the redundant nf_bridge_info_exists() test from the fake dst detection. This keeps netdev_priv(br->dev) alive until verdict completion, so the embedded fake rtable and its metrics backing storage cannot be freed out from under dst_release(). It also avoids the constant refcount bump and avoids using ipv4-specific dst helpers for IPv6 bridge traffic. Fixes: 34666d467cbf ("netfilter: bridge: move br_netfilter out of the core") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Haoze Xie Signed-off-by: Ren Wei Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 1 + net/netfilter/nf_queue.c | 14 ++++++++++++++ net/netfilter/nfnetlink_queue.c | 3 +++ 3 files changed, 18 insertions(+) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 3978c3174cdb..fc3e81c07364 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -18,6 +18,7 @@ struct nf_queue_entry { unsigned int id; unsigned int hook_index; /* index in hook_entries->hook[] */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct net_device *bridge_dev; struct net_device *physin; struct net_device *physout; #endif diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 57b450024a99..73363ceedebe 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -68,6 +68,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) nf_queue_sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + dev_put(entry->bridge_dev); dev_put(entry->physin); dev_put(entry->physout); #endif @@ -84,6 +85,8 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const struct sk_buff *skb = entry->skb; + struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = NULL; if (nf_bridge_info_exists(skb)) { entry->physin = nf_bridge_get_physindev(skb, entry->state.net); @@ -92,6 +95,16 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry) entry->physin = NULL; entry->physout = NULL; } + + if (entry->state.pf == NFPROTO_BRIDGE && + dst && (dst->flags & DST_FAKE_RTABLE)) + dev = dst_dev_rcu(dst); + + /* Must hold a reference on the bridge device: dst_hold() protects + * the dst itself, but the fake rtable is embedded in bridge-private + * storage that netdevice teardown can free independently. + */ + entry->bridge_dev = dev; #endif } @@ -108,6 +121,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) dev_hold(state->out); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + dev_hold(entry->bridge_dev); dev_hold(entry->physin); dev_hold(entry->physout); #endif diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index c5e29fec419b..80ca077b81bd 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1262,6 +1262,9 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) if (physinif == ifindex || physoutif == ifindex) return 1; + + if (entry->bridge_dev && entry->bridge_dev->ifindex == ifindex) + return 1; #endif if (entry->skb_dev && entry->skb_dev->ifindex == ifindex) return 1; -- 2.47.3