Bind VXLAN sockets to the local addresses if the IFLA_VXLAN_LOCALBIND option is set. This is the new default. Change vxlan_find_sock to search for the socket using the listening address. This is implemented by copying the VXLAN local address to the udp_port_cfg passed to udp_sock_create. The freebind option is set because VXLAN interfaces may be UP before their outgoing interface is. This fixes multiple VXLAN selftests that fail because of that race. Signed-off-by: Richard Gobert --- drivers/net/vxlan/vxlan_core.c | 58 ++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 667ff17c4569..cc22844fcc4c 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -78,18 +78,33 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) } /* Find VXLAN socket based on network namespace, address family, UDP port, - * enabled unshareable flags and socket device binding (see l3mdev with - * non-default VRF). + * bound address, enabled unshareable flags and socket device binding + * (see l3mdev with non-default VRF). */ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, - __be16 port, u32 flags, int ifindex) + __be16 port, u32 flags, int ifindex, + union vxlan_addr *saddr) { struct vxlan_sock *vs; flags &= VXLAN_F_RCV_FLAGS; hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { - if (inet_sk(vs->sock->sk)->inet_sport == port && + struct sock *sk = vs->sock->sk; + struct inet_sock *inet = inet_sk(sk); + + if (flags & VXLAN_F_LOCALBIND) { + if (family == AF_INET && + inet->inet_rcv_saddr != saddr->sin.sin_addr.s_addr) + continue; +#if IS_ENABLED(CONFIG_IPV6) + else if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, + &saddr->sin6.sin6_addr) != 0) + continue; +#endif + } + + if (inet->inet_sport == port && vxlan_get_sk_family(vs) == family && vs->flags == flags && vs->sock->sk->sk_bound_dev_if == ifindex) @@ -141,11 +156,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, /* Look up VNI in a per net namespace table */ static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex, __be32 vni, sa_family_t family, - __be16 port, u32 flags) + __be16 port, u32 flags, + union vxlan_addr *saddr) { struct vxlan_sock *vs; - vs = vxlan_find_sock(net, family, port, flags, ifindex); + vs = vxlan_find_sock(net, family, port, flags, ifindex, saddr); if (!vs) return NULL; @@ -2309,7 +2325,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, dst_release(dst); dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni, addr_family, dst_port, - vxlan->cfg.flags); + vxlan->cfg.flags, &vxlan->cfg.saddr); if (!dst_vxlan) { DEV_STATS_INC(dev, tx_errors); vxlan_vnifilter_count(vxlan, vni, NULL, @@ -3508,8 +3524,9 @@ static const struct ethtool_ops vxlan_ethtool_ops = { .get_link_ksettings = vxlan_get_link_ksettings, }; -static struct socket *vxlan_create_sock(struct net *net, bool ipv6, - __be16 port, u32 flags, int ifindex) +static struct socket *vxlan_create_sock(struct net *net, bool ipv6, __be16 port, + u32 flags, int ifindex, + union vxlan_addr *addr) { struct socket *sock; struct udp_port_cfg udp_conf; @@ -3526,6 +3543,20 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6, udp_conf.family = AF_INET; } + if (flags & VXLAN_F_LOCALBIND) { + if (ipv6) { +#if IS_ENABLED(CONFIG_IPV6) + memcpy(&udp_conf.local_ip6.s6_addr32, + &addr->sin6.sin6_addr.s6_addr32, + sizeof(addr->sin6.sin6_addr.s6_addr32)); +#endif + } else { + udp_conf.local_ip.s_addr = addr->sin.sin_addr.s_addr; + } + + udp_conf.freebind = 1; + } + udp_conf.local_udp_port = port; udp_conf.bind_ifindex = ifindex; @@ -3541,7 +3572,8 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6, /* Create new listen socket if needed */ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, __be16 port, u32 flags, - int ifindex) + int ifindex, + union vxlan_addr *addr) { struct vxlan_sock *vs; struct socket *sock; @@ -3557,7 +3589,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, for (h = 0; h < VNI_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vs->vni_list[h]); - sock = vxlan_create_sock(net, ipv6, port, flags, ifindex); + sock = vxlan_create_sock(net, ipv6, port, flags, ifindex, addr); if (IS_ERR(sock)) { kfree(vs); return ERR_CAST(sock); @@ -3610,7 +3642,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) rcu_read_lock(); vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, vxlan->cfg.dst_port, vxlan->cfg.flags, - l3mdev_index); + l3mdev_index, &vxlan->cfg.saddr); if (vs && !refcount_inc_not_zero(&vs->refcnt)) { rcu_read_unlock(); return -EBUSY; @@ -3620,7 +3652,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) if (!vs) vs = vxlan_socket_create(vxlan->net, ipv6, vxlan->cfg.dst_port, vxlan->cfg.flags, - l3mdev_index); + l3mdev_index, &vxlan->cfg.saddr); if (IS_ERR(vs)) return PTR_ERR(vs); #if IS_ENABLED(CONFIG_IPV6) -- 2.36.1