ip_mc_output(), __ip_local_out() and xfrm4_output() read the egress network device from rt->dst.dev / skb_dst_dev(skb) and then pass that pointer as the 'out' device argument of NF_HOOK without holding rcu_read_lock. dst->dev is a field protected by RCU. When a device is unregistered its value is replaced with blackhole_netdev, and the previous device is freed after an RCU grace period. A section that reads dst->dev and uses that pointer must therefore hold rcu_read_lock. The three functions above are missing this protection, so when the egress device is unregistered concurrently with transmission, a LOCAL_OUT / POST_ROUTING hook (nft meta oif, selinux_ip_postroute_compat, etc.) can reference a device pointer that is no longer valid. In all three leaves, wrap the section that reads the device and runs NF_HOOK in rcu_read_lock(), and read the device through the RCU accessors (skb_dst_dev_rcu() / dst_dev_rcu()). As a result, the RCU grace period cannot complete while a transmission is in flight, so the egress device is not freed and the hook always references a valid device. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Hyunwoo Kim --- net/ipv4/ip_output.c | 27 +++++++++++++++++++-------- net/ipv4/xfrm4_output.c | 13 +++++++++---- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5bcd73cbdb41..92a70b7e74a6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -102,6 +102,7 @@ EXPORT_SYMBOL(ip_send_check); int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); + int err; IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS); @@ -117,9 +118,12 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); - return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb_dst_dev(skb), - dst_output); + rcu_read_lock(); + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, + net, sk, skb, NULL, skb_dst_dev_rcu(skb), + dst_output); + rcu_read_unlock(); + return err; } int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -368,7 +372,11 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk, int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->dst.dev; + struct net_device *dev; + int ret; + + rcu_read_lock(); + dev = dst_dev_rcu(&rt->dst); /* * If the indicated interface is up and running, send the packet. @@ -407,6 +415,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (ip_hdr(skb)->ttl == 0) { kfree_skb(skb); + rcu_read_unlock(); return 0; } } @@ -419,10 +428,12 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) ip_mc_finish_output); } - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb->dev, - ip_finish_output, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb->dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); + rcu_read_unlock(); + return ret; } int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 0ae67d537499..d5dc084777a3 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -30,10 +30,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, skb->dev, skb_dst_dev(skb), - __xfrm4_output, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + int ret; + + rcu_read_lock(); + ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, skb->dev, skb_dst_dev_rcu(skb), + __xfrm4_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); + rcu_read_unlock(); + return ret; } void xfrm4_local_error(struct sk_buff *skb, u32 mtu) -- 2.43.0