Add SEG6_IPTUNNEL_SRC in the uapi for users to configure a specific tunnel source address. Make seg6_iptunnel handle the new attribute correctly. It has priority over the configured per-netns tunnel source address, if any. Cc: David Ahern Signed-off-by: Justin Iurman Reviewed-by: Andrea Mayer --- include/uapi/linux/seg6_iptunnel.h | 1 + net/ipv6/seg6_iptunnel.c | 114 +++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 31 deletions(-) diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index ae78791372b8..485889b19900 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -20,6 +20,7 @@ enum { SEG6_IPTUNNEL_UNSPEC, SEG6_IPTUNNEL_SRH, + SEG6_IPTUNNEL_SRC, /* struct in6_addr */ __SEG6_IPTUNNEL_MAX, }; #define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 3e1b9991131a..e76cc0cc481e 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -49,6 +49,7 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) struct seg6_lwt { struct dst_cache cache; + struct in6_addr tunsrc; struct seg6_iptunnel_encap tuninfo[]; }; @@ -65,6 +66,7 @@ seg6_encap_lwtunnel(struct lwtunnel_state *lwt) static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, + [SEG6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), }; static int nla_put_srh(struct sk_buff *skb, int attrtype, @@ -87,23 +89,32 @@ static int nla_put_srh(struct sk_buff *skb, int attrtype, } static void set_tun_src(struct net *net, struct net_device *dev, - struct in6_addr *daddr, struct in6_addr *saddr) + struct in6_addr *daddr, struct in6_addr *saddr, + struct in6_addr *route_tunsrc) { struct seg6_pernet_data *sdata = seg6_pernet(net); struct in6_addr *tun_src; - rcu_read_lock(); - - tun_src = rcu_dereference(sdata->tun_src); - - if (!ipv6_addr_any(tun_src)) { - memcpy(saddr, tun_src, sizeof(struct in6_addr)); + /* Priority order to select tunnel source address: + * 1. per route source address (if configured) + * 2. per network namespace source address (if configured) + * 3. dynamic resolution + */ + if (route_tunsrc && !ipv6_addr_any(route_tunsrc)) { + memcpy(saddr, route_tunsrc, sizeof(struct in6_addr)); } else { - ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, - saddr); - } + rcu_read_lock(); + tun_src = rcu_dereference(sdata->tun_src); + + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, + IPV6_PREFER_SRC_PUBLIC, saddr); + } - rcu_read_unlock(); + rcu_read_unlock(); + } } /* Compute flowlabel for outer IPv6 header */ @@ -125,7 +136,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, } static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, - int proto, struct dst_entry *cache_dst) + int proto, struct dst_entry *cache_dst, + struct in6_addr *route_tunsrc) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst_dev(dst); @@ -182,7 +194,7 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { @@ -202,14 +214,15 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { - return __seg6_do_srh_encap(skb, osrh, proto, NULL); + return __seg6_do_srh_encap(skb, osrh, proto, NULL, NULL); } EXPORT_SYMBOL_GPL(seg6_do_srh_encap); /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ static int seg6_do_srh_encap_red(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto, - struct dst_entry *cache_dst) + struct dst_entry *cache_dst, + struct in6_addr *route_tunsrc) { __u8 first_seg = osrh->first_segment; struct dst_entry *dst = skb_dst(skb); @@ -272,7 +285,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, if (skip_srh) { hdr->nexthdr = proto; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); goto out; } @@ -308,7 +321,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, srcaddr: isrh->nexthdr = proto; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); #ifdef CONFIG_IPV6_SEG6_HMAC if (unlikely(!skip_srh && sr_has_hmac(isrh))) { @@ -383,9 +396,11 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct seg6_iptunnel_encap *tinfo; + struct seg6_lwt *slwt; int proto, err = 0; - tinfo = seg6_encap_lwtunnel(dst->lwtstate); + slwt = seg6_lwt_lwtunnel(dst->lwtstate); + tinfo = slwt->tuninfo; switch (tinfo->mode) { case SEG6_IPTUN_MODE_INLINE: @@ -410,11 +425,11 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) return -EINVAL; if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) - err = __seg6_do_srh_encap(skb, tinfo->srh, - proto, cache_dst); + err = __seg6_do_srh_encap(skb, tinfo->srh, proto, + cache_dst, &slwt->tunsrc); else - err = seg6_do_srh_encap_red(skb, tinfo->srh, - proto, cache_dst); + err = seg6_do_srh_encap_red(skb, tinfo->srh, proto, + cache_dst, &slwt->tunsrc); if (err) return err; @@ -436,12 +451,12 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) err = __seg6_do_srh_encap(skb, tinfo->srh, - IPPROTO_ETHERNET, - cache_dst); + IPPROTO_ETHERNET, cache_dst, + &slwt->tunsrc); else err = seg6_do_srh_encap_red(skb, tinfo->srh, - IPPROTO_ETHERNET, - cache_dst); + IPPROTO_ETHERNET, cache_dst, + &slwt->tunsrc); if (err) return err; @@ -678,6 +693,10 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, if (family != AF_INET6) return -EINVAL; + if (tb[SEG6_IPTUNNEL_SRC]) { + NL_SET_ERR_MSG(extack, "incompatible mode for tunsrc"); + return -EINVAL; + } break; case SEG6_IPTUN_MODE_ENCAP: break; @@ -702,13 +721,23 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); err = dst_cache_init(&slwt->cache, GFP_ATOMIC); - if (err) { - kfree(newts); - return err; - } + if (err) + goto free_lwt_state; memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + if (tb[SEG6_IPTUNNEL_SRC]) { + slwt->tunsrc = nla_get_in6_addr(tb[SEG6_IPTUNNEL_SRC]); + + if (ipv6_addr_any(&slwt->tunsrc) || + ipv6_addr_is_multicast(&slwt->tunsrc) || + ipv6_addr_loopback(&slwt->tunsrc)) { + NL_SET_ERR_MSG(extack, "invalid tunsrc address"); + err = -EINVAL; + goto free_dst_cache; + } + } + newts->type = LWTUNNEL_ENCAP_SEG6; newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; @@ -720,6 +749,12 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, *ts = newts; return 0; + +free_dst_cache: + dst_cache_destroy(&slwt->cache); +free_lwt_state: + kfree(newts); + return err; } static void seg6_destroy_state(struct lwtunnel_state *lwt) @@ -731,29 +766,46 @@ static int seg6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate); if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) return -EMSGSIZE; + if (!ipv6_addr_any(&slwt->tunsrc) && + nla_put_in6_addr(skb, SEG6_IPTUNNEL_SRC, &slwt->tunsrc)) + return -EMSGSIZE; + return 0; } static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) { struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate); + int nlsize; + + nlsize = nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); - return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); + if (!ipv6_addr_any(&slwt->tunsrc)) + nlsize += nla_total_size(sizeof(slwt->tunsrc)); + + return nlsize; } static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + struct seg6_lwt *a_slwt = seg6_lwt_lwtunnel(a); + struct seg6_lwt *b_slwt = seg6_lwt_lwtunnel(b); int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) return 1; + if (!ipv6_addr_equal(&a_slwt->tunsrc, &b_slwt->tunsrc)) + return 1; + return memcmp(a_hdr, b_hdr, len); } -- 2.39.2 Extend srv6_hencap_red_l3vpn_test.sh to include checks for the new "tunsrc" feature. If there is no support for tunsrc, it silently falls back to the encap config without tunsrc. Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Justin Iurman --- .../net/srv6_hencap_red_l3vpn_test.sh | 109 +++++++++++++++--- 1 file changed, 96 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 6a68c7eff1dc..cd7d061e21f8 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -193,6 +193,8 @@ ret=${ksft_skip} nsuccess=0 nfail=0 +HAS_TUNSRC=false + log_test() { local rc="$1" @@ -345,6 +347,17 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + # A dedicated ::dead: address (with preferred_lft 0, i.e., + # deprecated) is added when there is support for tunsrc. Because + # it is deprecated, the kernel should never auto-select it as + # source with current config. Only an explicit tunsrc can place + # it in the outer header. + if $HAS_TUNSRC; then + ip -netns "${nsname}" addr \ + add "${net_prefix}::dead:${rt}/64" \ + dev "${devname}" nodad preferred_lft 0 + fi + ip -netns "${nsname}" link set "${devname}" up done @@ -420,6 +433,7 @@ setup_rt_local_sids() # to the destination host) # $5 - encap mode (full or red) # $6 - traffic type (IPv6 or IPv4) +# $7 - force tunsrc (true or false) __setup_rt_policy() { local dst="$1" @@ -428,10 +442,46 @@ __setup_rt_policy() local dec_rt="$4" local mode="$5" local traffic="$6" + local with_tunsrc="$7" local nsname local policy='' + local tunsrc='' local n + # Verify the per-route tunnel source address ("tunsrc") feature. + # If it is not supported, fallback on encap config without tunsrc. + if $with_tunsrc && $HAS_TUNSRC; then + local net_prefix + local drule + local nxt + + eval nsname=\${$(get_rtname "${dec_rt}")} + + # Next SRv6 hop: first End router if any, or the decap router + [ -z "${end_rts}" ] && nxt="${dec_rt}" || nxt="${end_rts%% *}" + + # Use the right prefix for tunsrc depending on the next SRv6 hop + net_prefix="$(get_network_prefix "${encap_rt}" "${nxt}")" + tunsrc="tunsrc ${net_prefix}::dead:${encap_rt}" + + # To verify that the outer source address matches the one + # configured with tunsrc, the decap router discards packets + # with any other source address. + ip netns exec "${nsname}" ip6tables -t raw -I PREROUTING 1 \ + -s "${net_prefix}::dead:${encap_rt}" \ + -d "${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" \ + -j ACCEPT + + drule="PREROUTING \ + -d ${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC} \ + -j DROP" + + if ! ip netns exec "${nsname}" \ + ip6tables -t raw -C ${drule} &>/dev/null; then + ip netns exec "${nsname}" ip6tables -t raw -A ${drule} + fi + fi + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do @@ -444,7 +494,7 @@ __setup_rt_policy() if [ "${traffic}" -eq 6 ]; then ip -netns "${nsname}" -6 route \ add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ - encap seg6 mode "${mode}" segs "${policy}" \ + encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \ dev "${VRF_DEVNAME}" ip -netns "${nsname}" -6 neigh \ @@ -455,7 +505,7 @@ __setup_rt_policy() # received, otherwise the proxy arp does not work. ip -netns "${nsname}" -4 route \ add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ - encap seg6 mode "${mode}" segs "${policy}" \ + encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \ dev "${VRF_DEVNAME}" fi } @@ -463,13 +513,13 @@ __setup_rt_policy() # see __setup_rt_policy setup_rt_policy_ipv6() { - __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 "$6" } #see __setup_rt_policy setup_rt_policy_ipv4() { - __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 "$6" } setup_hs() @@ -567,41 +617,41 @@ setup() # the network path between hs-1 and hs-2 traverses several routers # depending on the direction of traffic. # - # Direction hs-1 -> hs-2 (H.Encaps.Red) + # Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc) # - rt-3,rt-4 (SRv6 End behaviors) # - rt-2 (SRv6 End.DT46 behavior) # # Direction hs-2 -> hs-1 (H.Encaps.Red) # - rt-1 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red - setup_rt_policy_ipv6 1 2 "" 1 encap.red + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red true + setup_rt_policy_ipv6 1 2 "" 1 encap.red false # create an IPv4 VPN between hosts hs-1 and hs-2 # the network path between hs-1 and hs-2 traverses several routers # depending on the direction of traffic. # - # Direction hs-1 -> hs-2 (H.Encaps.Red) + # Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc) # - rt-2 (SRv6 End.DT46 behavior) # # Direction hs-2 -> hs-1 (H.Encaps.Red) # - rt-4,rt-3 (SRv6 End behaviors) # - rt-1 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv4 2 1 "" 2 encap.red - setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red + setup_rt_policy_ipv4 2 1 "" 2 encap.red true + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red false # create an IPv6 VPN between hosts hs-3 and hs-4 # the network path between hs-3 and hs-4 traverses several routers # depending on the direction of traffic. # - # Direction hs-3 -> hs-4 (H.Encaps.Red) + # Direction hs-3 -> hs-4 (H.Encaps.Red + tunsrc) # - rt-2 (SRv6 End Behavior) # - rt-4 (SRv6 End.DT46 behavior) # # Direction hs-4 -> hs-3 (H.Encaps.Red) # - rt-1 (SRv6 End behavior) # - rt-3 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv6 4 3 "2" 4 encap.red - setup_rt_policy_ipv6 3 4 "1" 3 encap.red + setup_rt_policy_ipv6 4 3 "2" 4 encap.red true + setup_rt_policy_ipv6 3 4 "1" 3 encap.red false # testing environment was set up successfully SETUP_ERR=0 @@ -809,6 +859,38 @@ test_vrf_or_ksft_skip() fi } +# Before enabling tunsrc tests, make sure tunsrc and ip6tables are supported. +check_tunsrc_support() +{ + setup_ns tunsrc_ns + + ip -netns "${tunsrc_ns}" link add veth0 type veth \ + peer name veth1 netns "${tunsrc_ns}" + + ip -netns "${tunsrc_ns}" link set veth0 up + + if ! ip -netns "${tunsrc_ns}" -6 route add fc00::dead:beef/128 \ + encap seg6 mode encap.red tunsrc fc00::1 segs fc00::2 \ + dev veth0 &>/dev/null; then + cleanup_ns "${tunsrc_ns}" + return + fi + + if ! ip -netns "${tunsrc_ns}" -6 route show | grep -q "tunsrc"; then + cleanup_ns "${tunsrc_ns}" + return + fi + + if ! ip netns exec "${tunsrc_ns}" ip6tables -t raw -A PREROUTING \ + -d fc00::dead:beef -j DROP &>/dev/null; then + cleanup_ns "${tunsrc_ns}" + return + fi + + cleanup_ns "${tunsrc_ns}" + HAS_TUNSRC=true +} + if [ "$(id -u)" -ne 0 ]; then echo "SKIP: Need root privileges" exit "${ksft_skip}" @@ -826,6 +908,7 @@ test_vrf_or_ksft_skip set -e trap cleanup EXIT +check_tunsrc_support setup set +e -- 2.39.2