Followup of commit 88fe14253e18 ("net: dst: add four helpers to annotate data-races around dst->dev"). We want to gradually add explicit RCU protection to dst->dev, including lockdep support. Add an union to alias dst->dev_rcu and dst->dev. Add dst_dev_net_rcu() helper. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet --- include/net/dst.h | 16 +++++++++++----- net/core/dst.c | 2 +- net/ipv4/route.c | 4 ++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index bab01363bb975dc20ea0e1485a515c1872537ab9..f8aa1239b4db639bd6b63f4ddb4ec4d7ee459ac0 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -24,7 +24,10 @@ struct sk_buff; struct dst_entry { - struct net_device *dev; + union { + struct net_device *dev; + struct net_device __rcu *dev_rcu; + }; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; @@ -570,9 +573,12 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst) static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) { - /* In the future, use rcu_dereference(dst->dev) */ - WARN_ON_ONCE(!rcu_read_lock_held()); - return READ_ONCE(dst->dev); + return rcu_dereference(dst->dev_rcu); +} + +static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst) +{ + return dev_net_rcu(dst_dev_rcu(dst)); } static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) @@ -592,7 +598,7 @@ static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) { - return dev_net_rcu(skb_dst_dev(skb)); + return dev_net_rcu(skb_dst_dev_rcu(skb)); } struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); diff --git a/net/core/dst.c b/net/core/dst.c index e2de8b68c41d3fa6f8a94b61b88f531a2d79d3b4..e9d35f49c9e7800d7397b643c70931dd516a6265 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -150,7 +150,7 @@ void dst_dev_put(struct dst_entry *dst) dst->ops->ifdown(dst, dev); WRITE_ONCE(dst->input, dst_discard); WRITE_ONCE(dst->output, dst_discard_out); - WRITE_ONCE(dst->dev, blackhole_netdev); + rcu_assign_pointer(dst->dev_rcu, blackhole_netdev); netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, GFP_ATOMIC); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 771f6986ed05882ebaeb1117ac33d68edf5db699..c63feef55d5193aadd11f9d5b45c8f5482e06be5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1027,7 +1027,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); @@ -1327,7 +1327,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, net->ipv4.ip_rt_min_advmss); rcu_read_unlock(); -- 2.51.0.318.gd7df087d1a-goog Refactor icmpv6_xrlim_allow() and ip6_dst_hoplimit() so that we acquire rcu_read_lock() a bit longer to be able to use dst_dev_rcu() instead of dst_dev(). __ip6_rt_update_pmtu() and rt6_do_redirect can directly use dst_dev_rcu() in sections already holding rcu_read_lock(). Small changes to use dst_dev_net_rcu() in ip6_default_advmss(), ipv6_sock_ac_join(), ip6_mc_find_dev() and ndisc_send_skb(). Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet --- net/ipv6/anycast.c | 2 +- net/ipv6/icmp.c | 6 +++--- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/output_core.c | 8 +++++--- net/ipv6/route.c | 7 +++---- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f8a8e46286b8ee6e39d2d4c2e4149d528d7aef18..52599584422bf4168e37ea48b575c058b1309c7c 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -104,7 +104,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = dst_dev(&rt->dst); + dev = dst_dev_rcu(&rt->dst); netdev_hold(dev, &dev_tracker, GFP_ATOMIC); ip6_rt_put(rt); } else if (ishost) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 44550957fd4e360d78e6cb411c33d6bbf2359e1f..95cdd4cacb004fd4f2e569136a313afef3b25c58 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -209,7 +209,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, * this lookup should be more aggressive (not longer than timeout). */ dst = ip6_route_output(net, sk, fl6); - dev = dst_dev(dst); + rcu_read_lock(); + dev = dst_dev_rcu(dst); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -224,11 +225,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); res = inet_peer_xrlim_allow(peer, tmo); - rcu_read_unlock(); } + rcu_read_unlock(); if (!res) __ICMP6_INC_STATS(net, ip6_dst_idev(dst), ICMP6_MIB_RATELIMITHOST); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 55c49dc14b1bd9815128bbd07c80837adc19e7ec..016b572e7d6f0289657bda2a51a70153e98ed4fe 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -180,7 +180,7 @@ static struct net_device *ip6_mc_find_dev(struct net *net, rcu_read_lock(); rt = rt6_lookup(net, group, NULL, 0, NULL, 0); if (rt) { - dev = dst_dev(&rt->dst); + dev = dst_dev_rcu(&rt->dst); dev_hold(dev); ip6_rt_put(rt); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 57aaa7ae8ac3109d808dd46e8cfe54b57e48b214..f427e41e9c49bf342869bea4444f308a5ac03a26 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -505,7 +505,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); - dev = dst_dev(dst); + dev = dst_dev_rcu(dst); idev = __in6_dev_get(dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index d21fe27fe21e344b694e0378214fbad04c4844d2..1c9b283a4132dc4b3a8241b9e9255b407e03fe49 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -104,18 +104,20 @@ EXPORT_SYMBOL(ip6_find_1stfragopt); int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + + rcu_read_lock(); if (hoplimit == 0) { - struct net_device *dev = dst_dev(dst); + struct net_device *dev = dst_dev_rcu(dst); struct inet6_dev *idev; - rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) hoplimit = READ_ONCE(idev->cnf.hop_limit); else hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); - rcu_read_unlock(); } + rcu_read_unlock(); + return hoplimit; } EXPORT_SYMBOL(ip6_dst_hoplimit); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3299cfa12e21c96ecb5c4dea5f305d5f7ce16084..3371f16b7a3e615bbb41ee0d1a7c9187a761fc0c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2943,7 +2943,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (res.f6i->nh) { struct fib6_nh_match_arg arg = { - .dev = dst_dev(dst), + .dev = dst_dev_rcu(dst), .gw = &rt6->rt6i_gateway, }; @@ -3238,7 +3238,6 @@ EXPORT_SYMBOL_GPL(ip6_sk_redirect); static unsigned int ip6_default_advmss(const struct dst_entry *dst) { - struct net_device *dev = dst_dev(dst); unsigned int mtu = dst_mtu(dst); struct net *net; @@ -3246,7 +3245,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) rcu_read_lock(); - net = dev_net_rcu(dev); + net = dst_dev_net_rcu(dst); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) mtu = net->ipv6.sysctl.ip6_rt_min_advmss; @@ -4301,7 +4300,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (res.f6i->nh) { struct fib6_nh_match_arg arg = { - .dev = dst_dev(dst), + .dev = dst_dev_rcu(dst), .gw = &rt->rt6i_gateway, }; -- 2.51.0.318.gd7df087d1a-goog Use RCU in ip6_xmit() in order to use dst_dev_rcu() to prevent possible UAF. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet --- net/ipv6/ip6_output.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1e1410237b6ef0eca7a0aac13c4d7c56a77e0252..e234640433d6b30d3c13d8367dbe7270ddb2c9d7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -268,35 +268,36 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk) int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) { - struct net *net = sock_net(sk); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst_dev(dst); struct inet6_dev *idev = ip6_dst_idev(dst); struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); + struct net *net = sock_net(sk); unsigned int head_room; + struct net_device *dev; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; - int hlimit = -1; + int ret, hlimit = -1; u32 mtu; + rcu_read_lock(); + + dev = dst_dev_rcu(dst); head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { - /* Make sure idev stays alive */ - rcu_read_lock(); + /* idev stays alive while we hold rcu_read_lock(). */ skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - rcu_read_unlock(); - return -ENOBUFS; + ret = -ENOBUFS; + goto unlock; } - rcu_read_unlock(); } if (opt) { @@ -358,17 +359,21 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * skb to its handler for processing */ skb = l3mdev_ip6_out((struct sock *)sk, skb); - if (unlikely(!skb)) - return 0; + if (unlikely(!skb)) { + ret = 0; + goto unlock; + } /* hooks should never assume socket lock is held. * we promote our socket to non const */ - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, (struct sock *)sk, skb, NULL, dev, - dst_output); + ret = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net, (struct sock *)sk, skb, NULL, dev, + dst_output); + goto unlock; } + ret = -EMSGSIZE; skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const @@ -377,7 +382,9 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); - return -EMSGSIZE; +unlock: + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ip6_xmit); -- 2.51.0.318.gd7df087d1a-goog Use RCU in ip6_output() in order to use dst_dev_rcu() to prevent possible UAF. We can remove rcu_read_lock()/rcu_read_unlock() pairs from ip6_finish_output2(). Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet --- net/ipv6/ip6_output.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e234640433d6b30d3c13d8367dbe7270ddb2c9d7..9d64c13bab5eacb4cc05c78cccd86a7aeb36d37e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,7 +60,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst_dev(dst); + struct net_device *dev = dst_dev_rcu(dst); struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *daddr, *nexthop; @@ -70,15 +70,12 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { - /* Make sure idev stays alive */ - rcu_read_lock(); + /* idev stays alive because we hold rcu_read_lock(). */ skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - rcu_read_unlock(); return -ENOMEM; } - rcu_read_unlock(); } hdr = ipv6_hdr(skb); @@ -123,7 +120,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -131,7 +127,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { - rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; @@ -139,7 +134,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); - rcu_read_unlock(); return ret; } @@ -233,22 +227,29 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst_dev(dst), *indev = skb->dev; - struct inet6_dev *idev = ip6_dst_idev(dst); + struct net_device *dev, *indev = skb->dev; + struct inet6_dev *idev; + int ret; skb->protocol = htons(ETH_P_IPV6); + rcu_read_lock(); + dev = dst_dev_rcu(dst); + idev = ip6_dst_idev(dst); skb->dev = dev; if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, indev, dev, - ip6_finish_output, - !(IP6CB(skb)->flags & IP6SKB_REROUTED)); + ret = NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, indev, dev, + ip6_finish_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ip6_output); -- 2.51.0.318.gd7df087d1a-goog Use RCU to protect accesses to dst->dev from sk_setup_caps() and sk_dst_gso_max_size(). Also use dst_dev_rcu() in ip6_dst_mtu_maybe_forward(), and ip_dst_mtu_maybe_forward(). ip4_dst_hoplimit() can use dst_dev_net_rcu(). Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet --- include/net/ip.h | 6 ++++-- include/net/ip6_route.h | 2 +- include/net/route.h | 2 +- net/core/sock.c | 16 ++++++++++------ 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index befcba575129ac436912d9c19740a0a72fe23954..6dbd2bf8fa9c96dc342acc171471704981123eec 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -467,12 +467,14 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = dst_rtable(dst); + const struct net_device *dev; unsigned int mtu, res; struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + dev = dst_dev_rcu(dst); + net = dev_net_rcu(dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -486,7 +488,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, if (mtu) goto out; - mtu = READ_ONCE(dst_dev(dst)->mtu); + mtu = READ_ONCE(dev->mtu); if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9255f21818ee7b03d2608fd399b082c0098c7028..59f48ca3abdf5a8aef6b4ece13f9a1774fc04f38 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -337,7 +337,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst mtu = IPV6_MIN_MTU; rcu_read_lock(); - idev = __in6_dev_get(dst_dev(dst)); + idev = __in6_dev_get(dst_dev_rcu(dst)); if (idev) mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); diff --git a/include/net/route.h b/include/net/route.h index c71998f464f8e6f2e4e3d03bd0db6d0da875f636..f90106f383c56d5cdaa8d455c840dbcf9a5e9555 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -390,7 +390,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) const struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); rcu_read_unlock(); } diff --git a/net/core/sock.c b/net/core/sock.c index e66ad1ec3a2d969b71835a492806563519459749..9a8290fcc35d66b2011157a30c13653784e78e96 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2587,7 +2587,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) } EXPORT_SYMBOL_GPL(sk_clone_lock); -static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) +static u32 sk_dst_gso_max_size(struct sock *sk, const struct net_device *dev) { bool is_ipv6 = false; u32 max_size; @@ -2597,8 +2597,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); #endif /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ - max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) : - READ_ONCE(dst_dev(dst)->gso_ipv4_max_size); + max_size = is_ipv6 ? READ_ONCE(dev->gso_max_size) : + READ_ONCE(dev->gso_ipv4_max_size); if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) max_size = GSO_LEGACY_MAX_SIZE; @@ -2607,9 +2607,12 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { + const struct net_device *dev; u32 max_segs = 1; - sk->sk_route_caps = dst_dev(dst)->features; + rcu_read_lock(); + dev = dst_dev_rcu(dst); + sk->sk_route_caps = dev->features; if (sk_is_tcp(sk)) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -2625,13 +2628,14 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; - sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); + sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dev); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ - max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1); + max_segs = max_t(u32, READ_ONCE(dev->gso_max_segs), 1); } } sk->sk_gso_max_segs = max_segs; sk_dst_set(sk, dst); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(sk_setup_caps); -- 2.51.0.318.gd7df087d1a-goog Replace three dst_dev() with a lockdep enabled helper. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet --- net/ipv4/tcp_metrics.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 03c068ea27b6ad3283b8365f31706c11ecdd07f2..10e86f1008e9d9c340eee0ecdefd7f21bd84bd5b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -170,7 +170,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, struct net *net; spin_lock_bh(&tcp_metrics_lock); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); /* While waiting for the spin-lock the cache might have been populated * with this entry and so we have to check again. @@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); @@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, else return NULL; - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); -- 2.51.0.318.gd7df087d1a-goog Use RCU to avoid a pair of atomic operations and a potential UAF on dst_dev()->flags. Signed-off-by: Eric Dumazet --- net/ipv4/tcp_fastopen.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f1884f0c9e523d50b2d120175cc94bc40b489dfb..7d945a527daf093f87882c7949e21058ed6df1cc 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -576,11 +576,12 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) } } else if (tp->syn_fastopen_ch && atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { - dst = sk_dst_get(sk); - dev = dst ? dst_dev(dst) : NULL; + rcu_read_lock(); + dst = __sk_dst_get(sk); + dev = dst ? dst_dev_rcu(dst) : NULL; if (!(dev && (dev->flags & IFF_LOOPBACK))) atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); - dst_release(dst); + rcu_read_unlock(); } } -- 2.51.0.318.gd7df087d1a-goog Change icmpv4_xrlim_allow(), ip_defrag() to prevent possible UAF. Change ipmr_prepare_xmit(), ipmr_queue_fwd_xmit(), ip_mr_output(), ipv4_neigh_lookup() to use lockdep enabled dst_dev_rcu(). Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet --- net/ipv4/icmp.c | 6 +++--- net/ipv4/ip_fragment.c | 6 ++++-- net/ipv4/ipmr.c | 6 +++--- net/ipv4/route.c | 4 ++-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7248c15cbd7592268dce883482727c994110dae8..823c70e34de835e78f58a7322e502324c795df86 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -319,17 +319,17 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, return true; /* No rate limit on loopback */ - dev = dst_dev(dst); + rcu_read_lock(); + dev = dst_dev_rcu(dst); if (dev && (dev->flags & IFF_LOOPBACK)) goto out; - rcu_read_lock(); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, l3mdev_master_ifindex_rcu(dev)); rc = inet_peer_xrlim_allow(peer, READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); - rcu_read_unlock(); out: + rcu_read_unlock(); if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); else diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b2584cce90ae1c14550396de486131b700d4afd7..f7012479713ba68db7c1c3fcee07a86141de31d3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -476,14 +476,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, /* Process an incoming IP datagram fragment. */ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { - struct net_device *dev = skb->dev ? : skb_dst_dev(skb); - int vif = l3mdev_master_ifindex_rcu(dev); + struct net_device *dev; struct ipq *qp; + int vif; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ rcu_read_lock(); + dev = skb->dev ? : skb_dst_dev_rcu(skb); + vif = l3mdev_master_ifindex_rcu(dev); qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { int ret, refs = 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 345e5faac63471249540fa77cb11fb2de50fd323..ca9eaee4c2ef5f5cdc03608291ad1a0dc187d657 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1905,7 +1905,7 @@ static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt, return -1; } - encap += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; + encap += LL_RESERVED_SPACE(dst_dev_rcu(&rt->dst)) + rt->dst.header_len; if (skb_cow(skb, encap)) { ip_rt_put(rt); @@ -1958,7 +1958,7 @@ static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt, * result in receiving multiple packets. */ NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, - net, NULL, skb, skb->dev, rt->dst.dev, + net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst), ipmr_forward_finish); return; @@ -2302,7 +2302,7 @@ int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) guard(rcu)(); - dev = rt->dst.dev; + dev = dst_dev_rcu(&rt->dst); if (IPCB(skb)->flags & IPSKB_FORWARDED) goto mc_output; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c63feef55d5193aadd11f9d5b45c8f5482e06be5..42f49187d3760330d0e0ca9e2c5fff778899f5fd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -414,11 +414,11 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst_dev(dst); + struct net_device *dev; struct neighbour *n; rcu_read_lock(); - + dev = dst_dev_rcu(dst); if (likely(rt->rt_gw_family == AF_INET)) { n = ip_neigh_gw4(dev, rt->rt_gw4); } else if (rt->rt_gw_family == AF_INET6) { -- 2.51.0.318.gd7df087d1a-goog