ip_mc_output(), __ip_local_out(), xfrm4_output(), raw_send_hdrinc(), xfrm_output_resume() and vrf_output() read the egress network device from rt->dst.dev / skb_dst(skb)->dev / skb_dst_dev(skb) and then pass that pointer as the 'out' device argument of NF_HOOK without holding rcu_read_lock. dst->dev is a field protected by RCU. When a device is unregistered its value is replaced with blackhole_netdev, and the previous device is freed after an RCU grace period. A section that reads dst->dev and uses that pointer must therefore hold rcu_read_lock. The functions above are missing this protection, so when the egress device is unregistered concurrently with transmission, a LOCAL_OUT / POST_ROUTING hook (nft meta oif, selinux_ip_postroute_compat, etc.) can reference a device pointer that is no longer valid. In the leaves above, wrap the section that reads the device and runs NF_HOOK in rcu_read_lock(), and read the device through the RCU accessors (skb_dst_dev_rcu() / dst_dev_rcu()). As a result, the RCU grace period cannot complete while a transmission is in flight, so the egress device is not freed and the hook always references a valid device. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Hyunwoo Kim --- Changes in v2: - Changed to a net-wide patch that also fixes the issue in raw_send_hdrinc(), xfrm_output_resume() and vrf_output(). - v1: https://lore.kernel.org/all/ahqIg6vURwYI0LJ5@v4bel/ --- drivers/net/vrf.c | 16 +++++++++++----- net/ipv4/ip_output.c | 27 +++++++++++++++++++-------- net/ipv4/raw.c | 4 +++- net/ipv4/xfrm4_output.c | 13 +++++++++---- net/xfrm/xfrm_output.c | 4 +++- 5 files changed, 45 insertions(+), 19 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 46209917ae4d..e9a1dd961805 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev; + int ret; + + rcu_read_lock(); + dev = skb_dst_dev_rcu(skb); IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, NULL, dev, - vrf_finish_output, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, NULL, dev, + vrf_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); + rcu_read_unlock(); + return ret; } /* set dst on skb to send packet to us via dev_xmit path. Allows diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5bcd73cbdb41..92a70b7e74a6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -102,6 +102,7 @@ EXPORT_SYMBOL(ip_send_check); int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); + int err; IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS); @@ -117,9 +118,12 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); - return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb_dst_dev(skb), - dst_output); + rcu_read_lock(); + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, + net, sk, skb, NULL, skb_dst_dev_rcu(skb), + dst_output); + rcu_read_unlock(); + return err; } int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -368,7 +372,11 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk, int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->dst.dev; + struct net_device *dev; + int ret; + + rcu_read_lock(); + dev = dst_dev_rcu(&rt->dst); /* * If the indicated interface is up and running, send the packet. @@ -407,6 +415,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (ip_hdr(skb)->ttl == 0) { kfree_skb(skb); + rcu_read_unlock(); return 0; } } @@ -419,10 +428,12 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) ip_mc_finish_output); } - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb->dev, - ip_finish_output, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb->dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); + rcu_read_unlock(); + return ret; } int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 68e88cb3e55c..555e62eacdc6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -410,9 +410,11 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb_transport_header(skb))->type); } + rcu_read_lock(); err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, rt->dst.dev, + net, sk, skb, NULL, skb_dst_dev_rcu(skb), dst_output); + rcu_read_unlock(); if (err > 0) err = net_xmit_errno(err); if (err) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 0ae67d537499..d5dc084777a3 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -30,10 +30,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, skb->dev, skb_dst_dev(skb), - __xfrm4_output, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + int ret; + + rcu_read_lock(); + ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, skb->dev, skb_dst_dev_rcu(skb), + __xfrm4_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); + rcu_read_unlock(); + return ret; } void xfrm4_local_error(struct sk_buff *skb, u32 mtu) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index cc35c2fcbbe0..2b69ac55c8a5 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -604,9 +604,11 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) if (!skb_dst(skb)->xfrm) return dst_output(net, sk, skb); + rcu_read_lock(); err = nf_hook(skb_dst(skb)->ops->family, NF_INET_POST_ROUTING, net, sk, skb, - NULL, skb_dst(skb)->dev, xfrm_output2); + NULL, skb_dst_dev_rcu(skb), xfrm_output2); + rcu_read_unlock(); if (unlikely(err != 1)) goto out; } -- 2.43.0