Update udp_tunnel{,6}_dst_lookup to return noref dsts when possible. This is done using a new boolean which indicates whether the returned dst is noref. When the returned dst is noref, the dst is only valid inside the RCU read-side critical section in which it was queried. Update all callers to properly use the new noref argument and convert all tunnels that use udp_tunnel{,6}_dst_lookup to noref. This affects bareudp, geneve and vxlan tunnels. Signed-off-by: Marek Mietus --- drivers/net/bareudp.c | 33 ++++++++++++------ drivers/net/geneve.c | 61 +++++++++++++++++++++++----------- drivers/net/vxlan/vxlan_core.c | 41 +++++++++++++++-------- include/net/udp_tunnel.h | 6 ++-- net/ipv4/udp_tunnel_core.c | 16 ++++++--- net/ipv6/ip6_udp_tunnel.c | 17 +++++++--- 6 files changed, 118 insertions(+), 56 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 92ee4a36f86f..5d2129c19c79 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -315,6 +315,7 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, int min_headroom; __u8 tos, ttl; __be32 saddr; + bool noref; int err; if (skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) @@ -329,7 +330,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, &info->key, sport, bareudp->port, key->tos, use_cache ? - (struct dst_cache *)&info->dst_cache : NULL); + (struct dst_cache *)&info->dst_cache : NULL, + &noref); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -364,11 +366,13 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, !net_eq(bareudp->net, dev_net(bareudp->dev)), !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 0); - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); return 0; free_dst: - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return err; } @@ -386,6 +390,7 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, int min_headroom; __u8 prio, ttl; __be16 sport; + bool noref; int err; if (skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) @@ -400,7 +405,8 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, 0, &saddr, key, sport, bareudp->port, key->tos, use_cache ? - (struct dst_cache *) &info->dst_cache : NULL); + (struct dst_cache *)&info->dst_cache : NULL, + &noref); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -434,11 +440,13 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 0); - dst_release(dst); + if (!noref) + dst_release(dst); return 0; free_dst: - dst_release(dst); + if (!noref) + dst_release(dst); return err; } @@ -507,6 +515,7 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct bareudp_dev *bareudp = netdev_priv(dev); bool use_cache; __be16 sport; + bool noref; use_cache = ip_tunnel_dst_cache_usable(skb, info); sport = udp_flow_src_port(bareudp->net, skb, @@ -520,11 +529,13 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, &info->key, sport, bareudp->port, info->key.tos, - use_cache ? &info->dst_cache : NULL); + use_cache ? &info->dst_cache : NULL, + &noref); if (IS_ERR(rt)) return PTR_ERR(rt); - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); info->key.u.ipv4.src = saddr; } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; @@ -534,11 +545,13 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, 0, &saddr, &info->key, sport, bareudp->port, info->key.tos, - use_cache ? &info->dst_cache : NULL); + use_cache ? &info->dst_cache : NULL, + &noref); if (IS_ERR(dst)) return PTR_ERR(dst); - dst_release(dst); + if (!noref) + dst_release(dst); info->key.u.ipv6.src = saddr; } else { return -EINVAL; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index cfc8e7b5f8f2..32ce2e197260 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1227,7 +1227,8 @@ static void geneve_put_gro_hint_opt(struct genevehdr *gnvh, int opt_size, memcpy(gro_opt + 1, hint, sizeof(*hint)); } -static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, +static int geneve_build_skb(struct dst_entry *dst, bool noref, + struct sk_buff *skb, const struct ip_tunnel_info *info, const struct geneve_dev *geneve, int ip_hdr_len) { @@ -1268,7 +1269,8 @@ static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, return 0; free_dst: - dst_release(dst); + if (!noref) + dst_release(dst); return err; } @@ -1300,6 +1302,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 df = 0; __be32 saddr; __be16 sport; + bool noref; int err; if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) @@ -1318,7 +1321,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, &info->key, sport, geneve->cfg.info.key.tp_dst, tos, use_cache ? - (struct dst_cache *)&info->dst_cache : NULL); + (struct dst_cache *)&info->dst_cache : NULL, + &noref); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1327,7 +1331,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, geneve_build_gro_hint_opt(geneve, skb), netif_is_any_bridge_port(dev)); if (err < 0) { - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return err; } else if (err) { struct ip_tunnel_info *info; @@ -1338,7 +1343,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) { - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return -ENOMEM; } @@ -1347,13 +1353,15 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, } if (!pskb_may_pull(skb, ETH_HLEN)) { - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return -EINVAL; } skb->protocol = eth_type_trans(skb, geneve->dev); __netif_rx(skb); - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return -EMSGSIZE; } @@ -1386,7 +1394,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, } } - err = geneve_build_skb(&rt->dst, skb, info, geneve, + err = geneve_build_skb(&rt->dst, noref, skb, info, geneve, sizeof(struct iphdr)); if (unlikely(err)) return err; @@ -1396,7 +1404,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, !net_eq(geneve->net, dev_net(geneve->dev)), !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 0); - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); return 0; } @@ -1412,6 +1421,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool use_cache; __u8 prio, ttl; __be16 sport; + bool noref; int err; if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) @@ -1430,7 +1440,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, &saddr, key, sport, geneve->cfg.info.key.tp_dst, prio, use_cache ? - (struct dst_cache *)&info->dst_cache : NULL); + (struct dst_cache *)&info->dst_cache : NULL, + &noref); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -1439,7 +1450,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, geneve_build_gro_hint_opt(geneve, skb), netif_is_any_bridge_port(dev)); if (err < 0) { - dst_release(dst); + if (!noref) + dst_release(dst); return err; } else if (err) { struct ip_tunnel_info *info = skb_tunnel_info(skb); @@ -1449,7 +1461,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) { - dst_release(dst); + if (!noref) + dst_release(dst); return -ENOMEM; } @@ -1458,13 +1471,15 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, } if (!pskb_may_pull(skb, ETH_HLEN)) { - dst_release(dst); + if (!noref) + dst_release(dst); return -EINVAL; } skb->protocol = eth_type_trans(skb, geneve->dev); __netif_rx(skb); - dst_release(dst); + if (!noref) + dst_release(dst); return -EMSGSIZE; } @@ -1478,7 +1493,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = key->ttl; ttl = ttl ? : ip6_dst_hoplimit(dst); } - err = geneve_build_skb(dst, skb, info, geneve, sizeof(struct ipv6hdr)); + err = geneve_build_skb(dst, noref, skb, info, geneve, sizeof(struct ipv6hdr)); if (unlikely(err)) return err; @@ -1488,7 +1503,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 0); - dst_release(dst); + if (!noref) + dst_release(dst); return 0; } #endif @@ -1551,6 +1567,7 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) struct ip_tunnel_info *info = skb_tunnel_info(skb); struct geneve_dev *geneve = netdev_priv(dev); __be16 sport; + bool noref; if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; @@ -1572,11 +1589,13 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) &info->key, sport, geneve->cfg.info.key.tp_dst, tos, - use_cache ? &info->dst_cache : NULL); + use_cache ? &info->dst_cache : NULL, + &noref); if (IS_ERR(rt)) return PTR_ERR(rt); - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); info->key.u.ipv4.src = saddr; #if IS_ENABLED(CONFIG_IPV6) } else if (ip_tunnel_info_af(info) == AF_INET6) { @@ -1598,11 +1617,13 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, prio, - use_cache ? &info->dst_cache : NULL); + use_cache ? &info->dst_cache : NULL, + &noref); if (IS_ERR(dst)) return PTR_ERR(dst); - dst_release(dst); + if (!noref) + dst_release(dst); info->key.u.ipv6.src = saddr; #endif } else { diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index aacef14cd300..491253467e59 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2300,6 +2300,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, int addr_family, __be16 dst_port, int dst_ifindex, __be32 vni, struct dst_entry *dst, + bool noref, u32 rt_flags) { #if IS_ENABLED(CONFIG_IPV6) @@ -2315,7 +2316,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, vxlan->cfg.flags & VXLAN_F_LOCALBYPASS) { struct vxlan_dev *dst_vxlan; - dst_release(dst); + if (!noref) + dst_release(dst); dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni, addr_family, dst_port, vxlan->cfg.flags); @@ -2348,6 +2350,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int pkt_len = skb->len; __be16 src_port = 0, dst_port; struct dst_entry *ndst = NULL; + bool noref = false; int addr_family; __u8 tos, ttl; int ifindex; @@ -2473,7 +2476,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, ifindex, &saddr, pkey, src_port, dst_port, - tos, use_cache ? dst_cache : NULL); + tos, use_cache ? dst_cache : NULL, + &noref); if (IS_ERR(rt)) { err = PTR_ERR(rt); reason = SKB_DROP_REASON_IP_OUTNOROUTES; @@ -2487,7 +2491,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, /* Bypass encapsulation if the destination is local */ err = encap_bypass_if_local(skb, dev, vxlan, AF_INET, dst_port, ifindex, vni, - &rt->dst, rt->rt_flags); + &rt->dst, noref, rt->rt_flags); if (err) goto out_unlock; @@ -2523,7 +2527,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, unclone->key.u.ipv4.dst = saddr; } vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); - dst_release(ndst); + if (!noref) + dst_release(ndst); goto out_unlock; } @@ -2540,7 +2545,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, pkey->u.ipv4.dst, tos, ttl, df, src_port, dst_port, xnet, !udp_sum, ipcb_flags); - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); #if IS_ENABLED(CONFIG_IPV6) } else { struct vxlan_sock *sock6; @@ -2559,7 +2565,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sock, ifindex, &saddr, pkey, src_port, dst_port, tos, - use_cache ? dst_cache : NULL); + use_cache ? dst_cache : NULL, + &noref); if (IS_ERR(ndst)) { err = PTR_ERR(ndst); ndst = NULL; @@ -2575,7 +2582,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, err = encap_bypass_if_local(skb, dev, vxlan, AF_INET6, dst_port, ifindex, vni, - ndst, rt6i_flags); + ndst, noref, rt6i_flags); if (err) goto out_unlock; } @@ -2598,7 +2605,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); - dst_release(ndst); + if (!noref) + dst_release(ndst); goto out_unlock; } @@ -2616,7 +2624,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, &saddr, &pkey->u.ipv6.dst, tos, ttl, pkey->label, src_port, dst_port, !udp_sum, ip6cb_flags); - dst_release(ndst); + if (!noref) + dst_release(ndst); #endif } vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len); @@ -2636,7 +2645,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) DEV_STATS_INC(dev, tx_carrier_errors); - dst_release(ndst); + if (!noref) + dst_release(ndst); DEV_STATS_INC(dev, tx_errors); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0); kfree_skb_reason(skb, reason); @@ -3224,6 +3234,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) struct vxlan_dev *vxlan = netdev_priv(dev); struct ip_tunnel_info *info = skb_tunnel_info(skb); __be16 sport, dport; + bool noref; sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, vxlan->cfg.port_max, true); @@ -3240,10 +3251,11 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) &info->key.u.ipv4.src, &info->key, sport, dport, info->key.tos, - &info->dst_cache); + &info->dst_cache, &noref); if (IS_ERR(rt)) return PTR_ERR(rt); - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); } else { #if IS_ENABLED(CONFIG_IPV6) struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); @@ -3256,10 +3268,11 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 0, &info->key.u.ipv6.src, &info->key, sport, dport, info->key.tos, - &info->dst_cache); + &info->dst_cache, &noref); if (IS_ERR(ndst)) return PTR_ERR(ndst); - dst_release(ndst); + if (!noref) + dst_release(ndst); #else /* !CONFIG_IPV6 */ return -EPFNOSUPPORT; #endif diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index d9c6d04bb3b5..5ccfed54c546 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -185,7 +185,8 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, __be32 *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 tos, - struct dst_cache *dst_cache); + struct dst_cache *dst_cache, + bool *noref); struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, @@ -193,7 +194,8 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct in6_addr *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 dsfield, - struct dst_cache *dst_cache); + struct dst_cache *dst_cache, + bool *noref); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, const unsigned long *flags, diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index b1f667c52cb2..978cd59281f6 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -233,16 +233,19 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, __be32 *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 tos, - struct dst_cache *dst_cache) + struct dst_cache *dst_cache, + bool *noref) { struct rtable *rt = NULL; struct flowi4 fl4; #ifdef CONFIG_DST_CACHE if (dst_cache) { - rt = dst_cache_get_ip4(dst_cache, saddr); - if (rt) + rt = dst_cache_get_ip4_rcu(dst_cache, saddr); + if (rt) { + *noref = true; return rt; + } } #endif @@ -267,9 +270,12 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, ip_rt_put(rt); return ERR_PTR(-ELOOP); } + *noref = false; #ifdef CONFIG_DST_CACHE - if (dst_cache) - dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); + if (dst_cache) { + dst_cache_steal_ip4(dst_cache, &rt->dst, fl4.saddr); + *noref = true; + } #endif *saddr = fl4.saddr; return rt; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index d58815db8182..b166ba225551 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -126,6 +126,7 @@ EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); * @dport: UDP destination port * @dsfield: The traffic class field * @dst_cache: The dst cache to use for lookup + * @noref: Is the returned dst noref? * This function performs a route lookup on a UDP tunnel * * It returns a valid dst pointer and stores src address to be used in @@ -140,16 +141,19 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct in6_addr *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 dsfield, - struct dst_cache *dst_cache) + struct dst_cache *dst_cache, + bool *noref) { struct dst_entry *dst = NULL; struct flowi6 fl6; #ifdef CONFIG_DST_CACHE if (dst_cache) { - dst = dst_cache_get_ip6(dst_cache, saddr); - if (dst) + dst = dst_cache_get_ip6_rcu(dst_cache, saddr); + if (dst) { + *noref = true; return dst; + } } #endif memset(&fl6, 0, sizeof(fl6)); @@ -173,9 +177,12 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, dst_release(dst); return ERR_PTR(-ELOOP); } + *noref = false; #ifdef CONFIG_DST_CACHE - if (dst_cache) - dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); + if (dst_cache) { + dst_cache_steal_ip6(dst_cache, dst, &fl6.saddr); + *noref = true; + } #endif *saddr = fl6.saddr; return dst; -- 2.51.0