This is a preliminary patch to introduce IP6IP6 flowtable acceleration. Signed-off-by: Lorenzo Bianconi --- net/netfilter/nf_flow_table_ip.c | 80 ++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 78883343e5d686014752ec4fe1a28319cbf08845..d28c256d33dc5a8d07490b765747b5c6c48aa67d 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -142,7 +142,18 @@ static bool ip_has_options(unsigned int thoff) return thoff != sizeof(struct iphdr); } -static void nf_flow_tuple_encap(struct sk_buff *skb, +struct nf_flowtable_ctx { + const struct net_device *in; + u32 offset; + u32 hdrsize; + struct { + u32 offset; + u8 proto; + } tun; +}; + +static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb, struct flow_offload_tuple *tuple) { __be16 inner_proto = skb->protocol; @@ -174,22 +185,15 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, break; } - if (inner_proto == htons(ETH_P_IP)) { + if (inner_proto == htons(ETH_P_IP) && + ctx->tun.proto == IPPROTO_IPIP) { iph = (struct iphdr *)(skb_network_header(skb) + offset); - if (iph->protocol == IPPROTO_IPIP) { - tuple->tun.dst_v4.s_addr = iph->daddr; - tuple->tun.src_v4.s_addr = iph->saddr; - tuple->tun.l3_proto = IPPROTO_IPIP; - } + tuple->tun.dst_v4.s_addr = iph->daddr; + tuple->tun.src_v4.s_addr = iph->saddr; + tuple->tun.l3_proto = IPPROTO_IPIP; } } -struct nf_flowtable_ctx { - const struct net_device *in; - u32 offset; - u32 hdrsize; -}; - static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, struct flow_offload_tuple *tuple) { @@ -257,7 +261,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, tuple->l3proto = AF_INET; tuple->l4proto = ipproto; tuple->iifidx = ctx->in->ifindex; - nf_flow_tuple_encap(skb, tuple); + nf_flow_tuple_encap(ctx, skb, tuple); return 0; } @@ -293,15 +297,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize) +static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb) { struct iphdr *iph; u16 size; - if (!pskb_may_pull(skb, sizeof(*iph) + *psize)) + if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset)) return false; - iph = (struct iphdr *)(skb_network_header(skb) + *psize); + iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); size = iph->ihl << 2; if (ip_is_fragment(iph) || unlikely(ip_has_options(size))) @@ -310,25 +315,27 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize) if (iph->ttl <= 1) return false; - if (iph->protocol == IPPROTO_IPIP) - *psize += size; + if (iph->protocol == IPPROTO_IPIP) { + ctx->tun.proto = IPPROTO_IPIP; + ctx->tun.offset = size; + ctx->offset += size; + } return true; } -static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb) +static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb) { - struct iphdr *iph = (struct iphdr *)skb_network_header(skb); - - if (iph->protocol != IPPROTO_IPIP) + if (ctx->tun.proto != IPPROTO_IPIP) return; - skb_pull(skb, iph->ihl << 2); + skb_pull(skb, ctx->tun.offset); skb_reset_network_header(skb); } -static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, - u32 *offset) +static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb, __be16 proto) { __be16 inner_proto = skb->protocol; struct vlan_ethhdr *veth; @@ -341,7 +348,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, veth = (struct vlan_ethhdr *)skb_mac_header(skb); if (veth->h_vlan_encapsulated_proto == proto) { - *offset += VLAN_HLEN; + ctx->offset += VLAN_HLEN; inner_proto = proto; ret = true; } @@ -349,19 +356,20 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, case htons(ETH_P_PPP_SES): if (nf_flow_pppoe_proto(skb, &inner_proto) && inner_proto == proto) { - *offset += PPPOE_SES_HLEN; + ctx->offset += PPPOE_SES_HLEN; ret = true; } break; } if (inner_proto == htons(ETH_P_IP)) - ret = nf_flow_ip4_tunnel_proto(skb, offset); + ret = nf_flow_ip4_tunnel_proto(ctx, skb); return ret; } -static void nf_flow_encap_pop(struct sk_buff *skb, +static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb, struct flow_offload_tuple_rhash *tuplehash) { struct vlan_hdr *vlan_hdr; @@ -388,7 +396,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, } if (skb->protocol == htons(ETH_P_IP)) - nf_flow_ip4_tunnel_pop(skb); + nf_flow_ip4_tunnel_pop(ctx, skb); } struct nf_flow_xmit { @@ -414,7 +422,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, { struct flow_offload_tuple tuple = {}; - if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset)) + if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP))) return NULL; if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0) @@ -458,7 +466,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, flow_offload_refresh(flow_table, flow, false); - nf_flow_encap_pop(skb, tuplehash); + nf_flow_encap_pop(ctx, skb, tuplehash); thoff -= ctx->offset; iph = ip_hdr(skb); @@ -836,7 +844,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, tuple->l3proto = AF_INET6; tuple->l4proto = nexthdr; tuple->iifidx = ctx->in->ifindex; - nf_flow_tuple_encap(skb, tuple); + nf_flow_tuple_encap(ctx, skb, tuple); return 0; } @@ -873,7 +881,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, flow_offload_refresh(flow_table, flow, false); - nf_flow_encap_pop(skb, tuplehash); + nf_flow_encap_pop(ctx, skb, tuplehash); ip6h = ipv6_hdr(skb); nf_flow_nat_ipv6(flow, skb, dir, ip6h); @@ -895,7 +903,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx, struct flow_offload_tuple tuple = {}; if (skb->protocol != htons(ETH_P_IPV6) && - !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset)) + !nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6))) return NULL; if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0) -- 2.52.0 Introduce sw acceleration for rx path of IP6IP6 tunnels relying on the netfilter flowtable infrastructure. Subsequent patches will add sw acceleration for IP6IP6 tunnels tx path. IP6IP6 rx sw acceleration can be tested running the following scenario where the traffic is forwarded between two NICs (eth0 and eth1) and an IP6IP6 tunnel is used to access a remote site (using eth1 as the underlay device): ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (2001:db8:3::2) $ip addr show 6: eth0: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet6 2001:db8:1::2/64 scope global nodad valid_lft forever preferred_lft forever 7: eth1: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet6 2001:db8:2::1/64 scope global nodad valid_lft forever preferred_lft forever 8: tun0@NONE: mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000 link/tunnel6 2001:db8:2::1 peer 2001:db8:2::2 permaddr ce9c:2940:7dcc:: inet6 2002:db8:1::1/64 scope global nodad valid_lft forever preferred_lft forever $ip -6 route show 2001:db8:1::/64 dev eth0 proto kernel metric 256 pref medium 2001:db8:2::/64 dev eth1 proto kernel metric 256 pref medium 2002:db8:1::/64 dev tun0 proto kernel metric 256 pref medium default via 2002:db8:1::2 dev tun0 metric 1024 pref medium $nft list ruleset table inet filter { flowtable ft { hook ingress priority filter devices = { eth0, eth1 } } chain forward { type filter hook forward priority filter; policy accept; meta l4proto { tcp, udp } flow add @ft } } Reproducing the scenario described above using veths I got the following results: - TCP stream received from the IPIP tunnel: - net-next: (baseline) ~ 79Gbps - net-next + IP6IP6 flowtbale support: ~106Gbps Signed-off-by: Lorenzo Bianconi --- net/ipv6/ip6_tunnel.c | 27 +++++++++++ net/netfilter/nf_flow_table_ip.c | 96 ++++++++++++++++++++++++++++++++++------ 2 files changed, 110 insertions(+), 13 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6405072050e0ef7521ca1fdddc4a0252e2159d2a..10341bfc16bd16a43290015952bd9a57658e6ae1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1828,6 +1828,32 @@ int ip6_tnl_encap_setup(struct ip6_tnl *t, } EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup); +static int ip6_tnl_fill_forward_path(struct net_device_path_ctx *ctx, + struct net_device_path *path) +{ + struct ip6_tnl *t = netdev_priv(ctx->dev); + struct flowi6 fl6 = { + .daddr = t->parms.raddr, + }; + struct dst_entry *dst; + int err; + + dst = ip6_route_output(dev_net(ctx->dev), NULL, &fl6); + if (!dst->error) { + path->type = DEV_PATH_TUN; + path->tun.src_v6 = t->parms.laddr; + path->tun.dst_v6 = t->parms.raddr; + path->tun.l3_proto = IPPROTO_IPV6; + path->dev = ctx->dev; + ctx->dev = dst->dev; + } + + err = dst->error; + dst_release(dst); + + return err; +} + static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, @@ -1836,6 +1862,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, + .ndo_fill_forward_path = ip6_tnl_fill_forward_path, }; #define IPXIPX_FEATURES (NETIF_F_SG | \ diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index d28c256d33dc5a8d07490b765747b5c6c48aa67d..ca5a5d58eb2edbd61fe826eb892eaaf2d5b9e537 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -15,6 +15,7 @@ #include #include #include +#include /* For layer 4 checksum field offset. */ #include #include @@ -159,6 +160,7 @@ static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx, __be16 inner_proto = skb->protocol; struct vlan_ethhdr *veth; struct pppoe_hdr *phdr; + struct ipv6hdr *ip6h; struct iphdr *iph; u16 offset = 0; int i = 0; @@ -185,12 +187,25 @@ static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx, break; } - if (inner_proto == htons(ETH_P_IP) && - ctx->tun.proto == IPPROTO_IPIP) { + switch (inner_proto) { + case htons(ETH_P_IP): iph = (struct iphdr *)(skb_network_header(skb) + offset); - tuple->tun.dst_v4.s_addr = iph->daddr; - tuple->tun.src_v4.s_addr = iph->saddr; - tuple->tun.l3_proto = IPPROTO_IPIP; + if (ctx->tun.proto == IPPROTO_IPIP) { + tuple->tun.dst_v4.s_addr = iph->daddr; + tuple->tun.src_v4.s_addr = iph->saddr; + tuple->tun.l3_proto = IPPROTO_IPIP; + } + break; + case htons(ETH_P_IPV6): + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); + if (ctx->tun.proto == IPPROTO_IPV6) { + tuple->tun.dst_v6 = ip6h->daddr; + tuple->tun.src_v6 = ip6h->saddr; + tuple->tun.l3_proto = IPPROTO_IPV6; + } + break; + default: + break; } } @@ -324,10 +339,57 @@ static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx, return true; } -static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx, - struct sk_buff *skb) +static bool nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb) +{ + const struct inet6_protocol *ipprot; + struct ipv6hdr *ip6h; + + if (!pskb_may_pull(skb, sizeof(*ip6h) + ctx->offset)) + return false; + + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); + if (ip6h->hop_limit <= 1) + return false; + + /* Initialize default values for extension headers parsing */ + skb->transport_header = skb->network_header + sizeof(*ip6h) + + ctx->offset; + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr) + ctx->offset; + + do { + unsigned int nhoff = IP6CB(skb)->nhoff; + u8 nexthdr; + + if (!pskb_pull(skb, skb_transport_offset(skb))) + return false; + + nexthdr = skb_network_header(skb)[nhoff]; + ipprot = rcu_dereference(inet6_protos[nexthdr]); + if (!ipprot) + break; + + if (ipprot->flags & INET6_PROTO_FINAL) { + if (nexthdr == IPPROTO_IPV6) { + ctx->tun.offset = skb->transport_header - + skb->network_header - ctx->offset; + ctx->tun.proto = IPPROTO_IPV6; + } + break; + } + } while (ipprot->handler(skb) > 0); + + skb_push(skb, skb->transport_header - skb->network_header); + ctx->offset += ctx->tun.offset; + + return true; +} + +static void nf_flow_ip_tunnel_pop(struct nf_flowtable_ctx *ctx, + struct sk_buff *skb) { - if (ctx->tun.proto != IPPROTO_IPIP) + if (ctx->tun.proto != IPPROTO_IPIP && + ctx->tun.proto != IPPROTO_IPV6) return; skb_pull(skb, ctx->tun.offset); @@ -362,8 +424,16 @@ static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx, break; } - if (inner_proto == htons(ETH_P_IP)) + switch (inner_proto) { + case htons(ETH_P_IP): ret = nf_flow_ip4_tunnel_proto(ctx, skb); + break; + case htons(ETH_P_IPV6): + ret = nf_flow_ip6_tunnel_proto(ctx, skb); + break; + default: + break; + } return ret; } @@ -395,8 +465,9 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, } } - if (skb->protocol == htons(ETH_P_IP)) - nf_flow_ip4_tunnel_pop(ctx, skb); + if (skb->protocol == htons(ETH_P_IP) || + skb->protocol == htons(ETH_P_IPV6)) + nf_flow_ip_tunnel_pop(ctx, skb); } struct nf_flow_xmit { @@ -902,8 +973,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx, { struct flow_offload_tuple tuple = {}; - if (skb->protocol != htons(ETH_P_IPV6) && - !nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6))) + if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6))) return NULL; if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0) -- 2.52.0 Introduce sw acceleration for tx path of IP6IP6 tunnels relying on the netfilter flowtable infrastructure. IP6IP6 tx sw acceleration can be tested running the following scenario where the traffic is forwarded between two NICs (eth0 and eth1) and an IP6IP6 tunnel is used to access a remote site (using eth1 as the underlay device): ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (2001:db8:3::2) $ip addr show 6: eth0: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet6 2001:db8:1::2/64 scope global nodad valid_lft forever preferred_lft forever 7: eth1: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet6 2001:db8:2::1/64 scope global nodad valid_lft forever preferred_lft forever 8: tun0@NONE: mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000 link/tunnel6 2001:db8:2::1 peer 2001:db8:2::2 permaddr ce9c:2940:7dcc:: inet6 2002:db8:1::1/64 scope global nodad valid_lft forever preferred_lft forever $ip -6 route show 2001:db8:1::/64 dev eth0 proto kernel metric 256 pref medium 2001:db8:2::/64 dev eth1 proto kernel metric 256 pref medium 2002:db8:1::/64 dev tun0 proto kernel metric 256 pref medium default via 2002:db8:1::2 dev tun0 metric 1024 pref medium $nft list ruleset table inet filter { flowtable ft { hook ingress priority filter devices = { eth0, eth1 } } chain forward { type filter hook forward priority filter; policy accept; meta l4proto { tcp, udp } flow add @ft } } Reproducing the scenario described above using veths I got the following results: - TCP stream received from the IPIP tunnel: - net-next: (baseline) ~93Gbps - net-next + IP6IP6 flowtbale support: ~98Gbps Signed-off-by: Lorenzo Bianconi --- net/netfilter/nf_flow_table_ip.c | 93 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index ca5a5d58eb2edbd61fe826eb892eaaf2d5b9e537..e5182630a83e3e959c7bde293ddc3beef295cd51 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -646,6 +647,91 @@ static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb, return 0; } +struct ipv6_tel_txoption { + struct ipv6_txoptions ops; + __u8 dst_opt[8]; +}; + +static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb, + struct flow_offload_tuple *tuple, + struct in6_addr **ip6_daddr) +{ + struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb); + int err, mtu, encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT; + u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6; + struct rtable *rt = dst_rtable(tuple->dst_cache); + __u8 dsfield = ipv6_get_dsfield(ip6h); + struct flowi6 fl6 = { + .daddr = tuple->tun.src_v6, + .saddr = tuple->tun.dst_v6, + .flowi6_proto = proto, + }; + u32 headroom; + + err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); + if (err) + return err; + + skb_set_inner_ipproto(skb, proto); + headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) + + rt->dst.header_len; + if (encap_limit) + headroom += 8; + err = skb_cow_head(skb, headroom); + if (err) + return err; + + skb_scrub_packet(skb, true); + mtu = dst_mtu(&rt->dst) - sizeof(*ip6h); + if (encap_limit) + mtu -= 8; + mtu = max(mtu, IPV6_MIN_MTU); + skb_dst_update_pmtu_no_confirm(skb, mtu); + + if (encap_limit > 0) { + struct ipv6_tel_txoption opt = { + .dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT, + .dst_opt[3] = 1, + .dst_opt[4] = encap_limit, + .dst_opt[5] = IPV6_TLV_PADN, + .dst_opt[6] = 1, + }; + + opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt; + opt.ops.opt_nflen = 8; + ipv6_push_frag_opts(skb, &opt.ops, &proto); + } + + skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + + ip6h = ipv6_hdr(skb); + ip6_flow_hdr(ip6h, dsfield, + ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6)); + ip6h->hop_limit = hop_limit; + ip6h->nexthdr = proto; + ip6h->daddr = tuple->tun.src_v6; + ip6h->saddr = tuple->tun.dst_v6; + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h)); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + IP6CB(skb)->flags = 0; + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + + *ip6_daddr = &tuple->tun.src_v6; + + return 0; +} + +static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb, + struct flow_offload_tuple *tuple, + struct in6_addr **ip6_daddr) +{ + if (tuple->tun_num) + return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr); + + return 0; +} + static int nf_flow_encap_push(struct sk_buff *skb, struct flow_offload_tuple *tuple) { @@ -934,6 +1020,9 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; + if (flow->tuplehash[!dir].tuple.tun_num) + mtu -= sizeof(*ip6h); + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; @@ -1023,6 +1112,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, other_tuple = &flow->tuplehash[!dir].tuple; ip6_daddr = &other_tuple->src_v6; + if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple, + &ip6_daddr) < 0) + return NF_DROP; + if (nf_flow_encap_push(skb, other_tuple) < 0) return NF_DROP; -- 2.52.0 Similar to IPIP, introduce specific selftest for IP6IP6 flowtable SW acceleration in nft_flowtable.sh Signed-off-by: Lorenzo Bianconi --- .../selftests/net/netfilter/nft_flowtable.sh | 62 ++++++++++++++++++---- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a68bc882fa4ec0175d8db3df8337bb5fe6c42e26..14d7f67715edc17ee1917913a3d0f417215200ba 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -592,16 +592,28 @@ ip -net "$nsr1" link set tun0 up ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0 ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null +ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2 +ip -net "$nsr1" link set tun6 up +ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad + ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1 ip -net "$nsr2" link set tun0 up ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0 ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null +ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 +ip -net "$nsr2" link set tun6 up +ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad + ip -net "$nsr1" route change default via 192.168.100.2 ip -net "$nsr2" route change default via 192.168.100.1 +ip -6 -net "$nsr1" route change default via fee1:3::2 +ip -6 -net "$nsr2" route change default via fee1:3::1 ip -net "$ns2" route add default via 10.0.2.1 +ip -6 -net "$ns2" route add default via dead:2::1 ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept' +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept' ip netns exec "$nsr1" nft -a insert rule inet filter forward \ 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept' @@ -611,28 +623,51 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then ret=1 fi +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel" +else + echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + # Create vlan tagged devices for IPIP traffic. ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10 ip -net "$nsr1" link set veth1.10 up ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10 +ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept' -ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2 -ip -net "$nsr1" link set tun1 up -ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1 + +ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2 +ip -net "$nsr1" link set tun0.10 up +ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10 ip -net "$nsr1" route change default via 192.168.200.2 -ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null -ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept' +ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept' + +ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2 +ip -net "$nsr1" link set tun6.10 up +ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad +ip -6 -net "$nsr1" route change default via fee1:5::2 +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept' ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10 ip -net "$nsr2" link set veth0.10 up ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10 +ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null -ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1 -ip -net "$nsr2" link set tun1 up -ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1 + +ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1 +ip -net "$nsr2" link set tun0.10 up +ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10 ip -net "$nsr2" route change default via 192.168.200.1 -ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null +ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null + +ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 +ip -net "$nsr2" link set tun6.10 up +ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad +ip -6 -net "$nsr2" route change default via fee1:5::1 if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2 @@ -640,10 +675,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then ret=1 fi +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan" +else + echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + # Restore the previous configuration ip -net "$nsr1" route change default via 192.168.10.2 ip -net "$nsr2" route change default via 192.168.10.1 ip -net "$ns2" route del default via 10.0.2.1 +ip -6 -net "$ns2" route del default via dead:2::1 } # Another test: -- 2.52.0