Commit 3cf7203ca620 ("net/tunnel: wait until all sk_user_data reader finish before releasing the sock") added synchronize_rcu() in udp_tunnel_sock_release(). This was intended to protect the fast path of a dying vxlan device from dereferencing vxlan_sock->sock->sk after sock_orphan() has set sock->sk to NULL. However, vxlan does not need to access struct socket itself in the fast path; it only reads struct sock, and struct socket is only used for tunnel setup and teardown. Let's store struct sock directly in struct vxlan_sock. In the next patch, we will free vxlan_sock with kfree_rcu(), then vxlan no longer needs synchronize_rcu() in udp_tunnel_sock_release(). Signed-off-by: Kuniyuki Iwashima --- drivers/net/vxlan/vxlan_core.c | 48 ++++++++++++++--------------- drivers/net/vxlan/vxlan_multicast.c | 8 ++--- include/net/vxlan.h | 4 +-- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index abf3ae04d75b..ce99da44ea7d 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -88,10 +88,10 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, flags &= VXLAN_F_RCV_FLAGS; hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { - if (inet_sk(vs->sock->sk)->inet_sport == port && + if (inet_sk(vs->sk)->inet_sport == port && vxlan_get_sk_family(vs) == family && vs->flags == flags && - vs->sock->sk->sk_bound_dev_if == ifindex) + vs->sk->sk_bound_dev_if == ifindex) return vs; } return NULL; @@ -1497,7 +1497,7 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) return false; hlist_del_rcu(&vs->hlist); - udp_tunnel_notify_del_rx_port(vs->sock->sk, + udp_tunnel_notify_del_rx_port(vs->sk, (vs->flags & VXLAN_F_GPE) ? UDP_TUNNEL_TYPE_VXLAN_GPE : UDP_TUNNEL_TYPE_VXLAN); @@ -1523,13 +1523,13 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan) vxlan_vs_del_dev(vxlan); if (__vxlan_sock_release_prep(sock4)) { - udp_tunnel_sock_release(sock4->sock->sk); + udp_tunnel_sock_release(sock4->sk); kfree(sock4); } #if IS_ENABLED(CONFIG_IPV6) if (__vxlan_sock_release_prep(sock6)) { - udp_tunnel_sock_release(sock6->sock->sk); + udp_tunnel_sock_release(sock6->sk); kfree(sock6); } #endif @@ -2477,7 +2477,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (!ifindex) - ifindex = sock4->sock->sk->sk_bound_dev_if; + ifindex = sock4->sk->sk_bound_dev_if; rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, ifindex, &saddr, pkey, src_port, dst_port, @@ -2544,7 +2544,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr, + udp_tunnel_xmit_skb(rt, sock4->sk, skb, saddr, pkey->u.ipv4.dst, tos, ttl, df, src_port, dst_port, xnet, !udp_sum, ipcb_flags); @@ -2561,9 +2561,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (!ifindex) - ifindex = sock6->sock->sk->sk_bound_dev_if; + ifindex = sock6->sk->sk_bound_dev_if; - ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sock->sk, + ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sk, ifindex, &saddr, pkey, src_port, dst_port, tos, use_cache ? dst_cache : NULL); @@ -2619,7 +2619,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev, + udp_tunnel6_xmit_skb(ndst, sock6->sk, skb, dev, &saddr, &pkey->u.ipv6.dst, tos, ttl, pkey->label, src_port, dst_port, !udp_sum, ip6cb_flags); @@ -3258,7 +3258,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) if (!sock6) return -EIO; - ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sock->sk, + ndst = udp_tunnel6_dst_lookup(skb, dev, vxlan->net, sock6->sk, 0, &info->key.u.ipv6.src, &info->key, sport, dport, info->key.tos, @@ -3336,9 +3336,9 @@ static void vxlan_offload_rx_ports(struct net_device *dev, bool push) type = UDP_TUNNEL_TYPE_VXLAN; if (push) - udp_tunnel_push_rx_port(dev, vs->sock->sk, type); + udp_tunnel_push_rx_port(dev, vs->sk, type); else - udp_tunnel_drop_rx_port(dev, vs->sock->sk, type); + udp_tunnel_drop_rx_port(dev, vs->sk, type); } } } @@ -3544,8 +3544,8 @@ static const struct ethtool_ops vxlan_ethtool_ops = { .get_link_ksettings = vxlan_get_link_ksettings, }; -static struct socket *vxlan_create_sock(struct net *net, bool ipv6, - __be16 port, u32 flags, int ifindex) +static struct sock *vxlan_create_sock(struct net *net, bool ipv6, + __be16 port, u32 flags, int ifindex) { struct socket *sock; struct udp_port_cfg udp_conf; @@ -3571,7 +3571,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6, return ERR_PTR(err); udp_allow_gso(sock->sk); - return sock; + return sock->sk; } /* Create new listen socket if needed */ @@ -3579,10 +3579,10 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, __be16 port, u32 flags, int ifindex) { + struct udp_tunnel_sock_cfg tunnel_cfg; struct vxlan_sock *vs; - struct socket *sock; + struct sock *sk; unsigned int h; - struct udp_tunnel_sock_cfg tunnel_cfg; ASSERT_RTNL(); @@ -3593,18 +3593,18 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, for (h = 0; h < VNI_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vs->vni_list[h]); - sock = vxlan_create_sock(net, ipv6, port, flags, ifindex); - if (IS_ERR(sock)) { + sk = vxlan_create_sock(net, ipv6, port, flags, ifindex); + if (IS_ERR(sk)) { kfree(vs); - return ERR_CAST(sock); + return ERR_CAST(sk); } - vs->sock = sock; + vs->sk = sk; refcount_set(&vs->refcnt, 1); vs->flags = (flags & VXLAN_F_RCV_FLAGS); hlist_add_head_rcu(&vs->hlist, vs_head(net, port)); - udp_tunnel_notify_add_rx_port(sock->sk, + udp_tunnel_notify_add_rx_port(sk, (vs->flags & VXLAN_F_GPE) ? UDP_TUNNEL_TYPE_VXLAN_GPE : UDP_TUNNEL_TYPE_VXLAN); @@ -3624,7 +3624,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, tunnel_cfg.gro_complete = vxlan_gro_complete; } - setup_udp_tunnel_sock(net, sock->sk, &tunnel_cfg); + setup_udp_tunnel_sock(net, sk, &tunnel_cfg); return vs; } diff --git a/drivers/net/vxlan/vxlan_multicast.c b/drivers/net/vxlan/vxlan_multicast.c index b0e80bca855c..3b75b48dc726 100644 --- a/drivers/net/vxlan/vxlan_multicast.c +++ b/drivers/net/vxlan/vxlan_multicast.c @@ -29,7 +29,7 @@ int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip, .imr_ifindex = ifindex, }; - sk = sock4->sock->sk; + sk = sock4->sk; lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); @@ -37,7 +37,7 @@ int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip, } else { struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); - sk = sock6->sock->sk; + sk = sock6->sk; lock_sock(sk); ret = ipv6_sock_mc_join(sk, ifindex, &ip->sin6.sin6_addr); release_sock(sk); @@ -62,7 +62,7 @@ int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip, .imr_ifindex = ifindex, }; - sk = sock4->sock->sk; + sk = sock4->sk; lock_sock(sk); ret = ip_mc_leave_group(sk, &mreq); release_sock(sk); @@ -70,7 +70,7 @@ int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip, } else { struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); - sk = sock6->sock->sk; + sk = sock6->sk; lock_sock(sk); ret = ipv6_sock_mc_drop(sk, ifindex, &ip->sin6.sin6_addr); release_sock(sk); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0ee50785f4f1..8b52294b2902 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -185,7 +185,7 @@ struct vxlan_metadata { /* per UDP socket information */ struct vxlan_sock { struct hlist_node hlist; - struct socket *sock; + struct sock *sk; struct hlist_head vni_list[VNI_HASH_SIZE]; refcount_t refcnt; u32 flags; @@ -448,7 +448,7 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) { - return vs->sock->sk->sk_family; + return vs->sk->sk_family; } #if IS_ENABLED(CONFIG_IPV6) -- 2.54.0.545.g6539524ca2-goog