Remove is_ipv6 from napi_gro_cb and use sk->sk_family instead. This frees up space for another ip_fixedid bit that will be added in the next commit. udp_sock_create always creates either a AP_INET or a AF_INET6 socket, so using sk->sk_family is reliable. In IPv6-FOU, cfg-ipv6_v6only is always enabled. Signed-off-by: Richard Gobert --- include/net/gro.h | 3 --- net/ipv4/fou_core.c | 32 ++++++++++++++------------------ net/ipv4/udp_offload.c | 2 -- net/ipv6/udp_offload.c | 2 -- 4 files changed, 14 insertions(+), 25 deletions(-) diff --git a/include/net/gro.h b/include/net/gro.h index a0fca7ac6e7e..87c68007f949 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -71,9 +71,6 @@ struct napi_gro_cb { /* Free the skb? */ u8 free:2; - /* Used in foo-over-udp, set in udp[46]_gro_receive */ - u8 is_ipv6:1; - /* Used in GRE, set in fou/gue_gro_receive */ u8 is_fou:1; diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 3e30745e2c09..a654a06ae7fd 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -228,21 +228,27 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) return 0; } +static inline const struct net_offload *fou_gro_ops(const struct sock *sk, + int proto) +{ + const struct net_offload __rcu **offloads; + + /* FOU doesn't allow IPv4 on IPv6 sockets. */ + offloads = sk->sk_family == AF_INET6 ? inet6_offloads : inet_offloads; + return rcu_dereference(offloads[proto]); +} + static struct sk_buff *fou_gro_receive(struct sock *sk, struct list_head *head, struct sk_buff *skb) { - const struct net_offload __rcu **offloads; struct fou *fou = fou_from_sock(sk); const struct net_offload *ops; struct sk_buff *pp = NULL; - u8 proto; if (!fou) goto out; - proto = fou->protocol; - /* We can clear the encap_mark for FOU as we are essentially doing * one of two possible things. We are either adding an L4 tunnel * header to the outer L3 tunnel header, or we are simply @@ -254,8 +260,7 @@ static struct sk_buff *fou_gro_receive(struct sock *sk, /* Flag this frame as already having an outer encap header */ NAPI_GRO_CB(skb)->is_fou = 1; - offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; - ops = rcu_dereference(offloads[proto]); + ops = fou_gro_ops(sk, fou->protocol); if (!ops || !ops->callbacks.gro_receive) goto out; @@ -268,10 +273,8 @@ static struct sk_buff *fou_gro_receive(struct sock *sk, static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { - const struct net_offload __rcu **offloads; struct fou *fou = fou_from_sock(sk); const struct net_offload *ops; - u8 proto; int err; if (!fou) { @@ -279,10 +282,7 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, goto out; } - proto = fou->protocol; - - offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; - ops = rcu_dereference(offloads[proto]); + ops = fou_gro_ops(sk, fou->protocol); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) { err = -ENOSYS; goto out; @@ -323,7 +323,6 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, struct list_head *head, struct sk_buff *skb) { - const struct net_offload __rcu **offloads; const struct net_offload *ops; struct sk_buff *pp = NULL; struct sk_buff *p; @@ -450,8 +449,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, /* Flag this frame as already having an outer encap header */ NAPI_GRO_CB(skb)->is_fou = 1; - offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; - ops = rcu_dereference(offloads[proto]); + ops = fou_gro_ops(sk, proto); if (!ops || !ops->callbacks.gro_receive) goto out; @@ -467,7 +465,6 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); - const struct net_offload __rcu **offloads; const struct net_offload *ops; unsigned int guehlen = 0; u8 proto; @@ -494,8 +491,7 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) return err; } - offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; - ops = rcu_dereference(offloads[proto]); + ops = fou_gro_ops(sk, proto); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 5128e2a5b00a..683689cf4293 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -891,8 +891,6 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) skb_gro_checksum_try_convert(skb, IPPROTO_UDP, inet_gro_compute_pseudo); skip: - NAPI_GRO_CB(skb)->is_ipv6 = 0; - if (static_branch_unlikely(&udp_encap_needed_key)) sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index d8445ac1b2e4..046f13b1d77a 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -154,8 +154,6 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) ip6_gro_compute_pseudo); skip: - NAPI_GRO_CB(skb)->is_ipv6 = 1; - if (static_branch_unlikely(&udpv6_encap_needed_key)) sk = udp6_gro_lookup_skb(skb, uh->source, uh->dest); -- 2.36.1 Only merge encapsulated packets if their outer IDs are either incrementing or fixed, just like for inner IDs and IDs of non-encapsulated packets. Add another ip_fixedid bit for a total of two bits: one for outer IDs (and for unencapsulated packets) and one for inner IDs. This commit preserves the current behavior of GSO where only the IDs of the inner-most headers are restored correctly. Signed-off-by: Richard Gobert --- include/net/gro.h | 37 ++++++++++++++++++------------------- net/ipv4/tcp_offload.c | 5 ++++- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/include/net/gro.h b/include/net/gro.h index 87c68007f949..4307b68afda7 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -75,7 +75,7 @@ struct napi_gro_cb { u8 is_fou:1; /* Used to determine if ipid_offset can be ignored */ - u8 ip_fixedid:1; + u8 ip_fixedid:2; /* Number of gro_receive callbacks this packet already went through */ u8 recursion_counter:4; @@ -442,29 +442,26 @@ static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, } static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, - struct sk_buff *p, bool outer) + struct sk_buff *p, bool inner) { const u32 id = ntohl(*(__be32 *)&iph->id); const u32 id2 = ntohl(*(__be32 *)&iph2->id); const u16 ipid_offset = (id >> 16) - (id2 >> 16); const u16 count = NAPI_GRO_CB(p)->count; const u32 df = id & IP_DF; - int flush; /* All fields must match except length and checksum. */ - flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF)); - - if (flush | (outer && df)) - return flush; + if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF))) + return true; /* When we receive our second frame we can make a decision on if we * continue this flow as an atomic flow with a fixed ID or if we use * an incrementing ID. */ if (count == 1 && df && !ipid_offset) - NAPI_GRO_CB(p)->ip_fixedid = true; + NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner; - return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid); + return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner))); } static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) @@ -478,28 +475,30 @@ static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr } static inline int __gro_receive_network_flush(const void *th, const void *th2, - struct sk_buff *p, const u16 diff, - bool outer) + struct sk_buff *p, bool inner) { - const void *nh = th - diff; - const void *nh2 = th2 - diff; + const void *nh, *nh2; + int off, diff; + + off = skb_transport_offset(p); + diff = off - NAPI_GRO_CB(p)->network_offsets[inner]; + nh = th - diff; + nh2 = th2 - diff; if (((struct iphdr *)nh)->version == 6) return ipv6_gro_flush(nh, nh2); else - return inet_gro_flush(nh, nh2, p, outer); + return inet_gro_flush(nh, nh2, p, inner); } static inline int gro_receive_network_flush(const void *th, const void *th2, struct sk_buff *p) { - const bool encap_mark = NAPI_GRO_CB(p)->encap_mark; - int off = skb_transport_offset(p); int flush; - flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark); - if (encap_mark) - flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false); + flush = __gro_receive_network_flush(th, th2, p, false); + if (NAPI_GRO_CB(p)->encap_mark) + flush |= __gro_receive_network_flush(th, th2, p, true); return flush; } diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index be5c2294610e..56817ef12ad2 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -472,6 +472,7 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct tcphdr *th = tcp_hdr(skb); + bool is_fixedid; if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4; @@ -485,8 +486,10 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); + is_fixedid = (NAPI_GRO_CB(skb)->ip_fixedid >> skb->encapsulation) & 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 | - (NAPI_GRO_CB(skb)->ip_fixedid * SKB_GSO_TCP_FIXEDID); + (is_fixedid * SKB_GSO_TCP_FIXEDID); tcp_gro_complete(skb); return 0; -- 2.36.1 Currently, NETIF_F_TSO_MANGLEID indicates that the inner-most ID can be mangled. Outer IDs can always be mangled. Make GSO preserve outer IDs by default, with NETIF_F_TSO_MANGLEID allowing both inner and outer IDs to be mangled. In the future, we could add NETIF_F_TSO_MANGLEID_INNER to provide more granular control to drivers. This commit also modifies a few drivers that use SKB_GSO_FIXEDID directly. Signed-off-by: Richard Gobert --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 ++++++-- drivers/net/ethernet/sfc/ef100_tx.c | 12 +++++++----- include/linux/netdevice.h | 9 +++++++-- include/linux/skbuff.h | 6 +++++- net/core/dev.c | 7 +++---- net/ipv4/af_inet.c | 13 ++++++------- net/ipv4/tcp_offload.c | 5 +---- 7 files changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b8c609d91d11..505c4ce7cef8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1289,8 +1289,12 @@ static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr * tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr, ipv4->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; - if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) { + bool encap = rq->hw_gro_data->fk.control.flags & FLOW_DIS_ENCAPSULATION; + + skb_shinfo(skb)->gso_type |= encap ? + SKB_GSO_TCP_FIXEDID_INNER : SKB_GSO_TCP_FIXEDID; + } skb->csum_start = (unsigned char *)tcp - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c index e6b6be549581..4efd22b44986 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.c +++ b/drivers/net/ethernet/sfc/ef100_tx.c @@ -189,7 +189,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx, { bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; unsigned int len, ip_offset, tcp_offset, payload_segs; - u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; + u32 mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; + u32 mangleid_inner = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; unsigned int outer_ip_offset, outer_l4_offset; u16 vlan_tci = skb_vlan_tag_get(skb); u32 mss = skb_shinfo(skb)->gso_size; @@ -201,7 +202,9 @@ static void ef100_make_tso_desc(struct efx_nic *efx, u32 paylen; if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) - mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID_INNER) + mangleid_inner = ESE_GZ_TX_DESC_IP4_ID_NO_OP; if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX) vlan_enable = skb_vlan_tag_present(skb); @@ -239,14 +242,13 @@ static void ef100_make_tso_desc(struct efx_nic *efx, ESF_GZ_TX_TSO_CSO_INNER_L4, 1, ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1, ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, - ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, + ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid_inner, ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial, ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial, - ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : - ESE_GZ_TX_DESC_IP4_ID_NO_OP, + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, mangleid_outer, ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci ); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e5de4b0a433..f47d4d67bce9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5287,13 +5287,18 @@ void skb_warn_bad_offload(const struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { - netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT; + netdev_features_t feature; + + if (gso_type & (SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_FIXEDID_INNER)) + gso_type |= __SKB_GSO_TCP_FIXEDID; + + feature = ((netdev_features_t)gso_type << NETIF_F_GSO_SHIFT) & NETIF_F_GSO_MASK; /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(__SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..9e4540d379ba 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -674,7 +674,7 @@ enum { /* This indicates the tcp segment has CWR set. */ SKB_GSO_TCP_ECN = 1 << 2, - SKB_GSO_TCP_FIXEDID = 1 << 3, + __SKB_GSO_TCP_FIXEDID = 1 << 3, SKB_GSO_TCPV6 = 1 << 4, @@ -707,6 +707,10 @@ enum { SKB_GSO_FRAGLIST = 1 << 18, SKB_GSO_TCP_ACCECN = 1 << 19, + + /* These don't correspond with netdev features. */ + SKB_GSO_TCP_FIXEDID = 1 << 30, + SKB_GSO_TCP_FIXEDID_INNER = 1 << 31, }; #if BITS_PER_LONG > 32 diff --git a/net/core/dev.c b/net/core/dev.c index 68dc47d7e700..9941c39b5970 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3772,10 +3772,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * IPv4 header has the potential to be fragmented. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - struct iphdr *iph = skb->encapsulation ? - inner_ip_hdr(skb) : ip_hdr(skb); - - if (!(iph->frag_off & htons(IP_DF))) + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || + (skb->encapsulation && + !(inner_ip_hdr(skb)->frag_off & htons(IP_DF)))) features &= ~NETIF_F_TSO_MANGLEID; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 76e38092cd8a..fc7a6955fa0a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1393,14 +1393,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); - if (!skb->encapsulation || encap) { - udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); + /* fixed ID is invalid if DF bit is not set */ + fixedid = !!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCP_FIXEDID << encap)); + if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF))) + goto out; - /* fixed ID is invalid if DF bit is not set */ - if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF))) - goto out; - } + if (!skb->encapsulation || encap) + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 56817ef12ad2..be5c2294610e 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -472,7 +472,6 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct tcphdr *th = tcp_hdr(skb); - bool is_fixedid; if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4; @@ -486,10 +485,8 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); - is_fixedid = (NAPI_GRO_CB(skb)->ip_fixedid >> skb->encapsulation) & 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 | - (is_fixedid * SKB_GSO_TCP_FIXEDID); + (NAPI_GRO_CB(skb)->ip_fixedid * SKB_GSO_TCP_FIXEDID); tcp_gro_complete(skb); return 0; -- 2.36.1 Currently, packets with fixed IDs will be merged only if their don't-fragment bit is set. Merged packets are re-split into segments before being fragmented, so the result is the same as if the packets weren't merged to begin with. Remove unnecessary don't-fragment checks. Signed-off-by: Richard Gobert --- include/net/gro.h | 5 ++--- net/ipv4/af_inet.c | 3 --- tools/testing/selftests/net/gro.c | 9 ++++----- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/include/net/gro.h b/include/net/gro.h index 4307b68afda7..7b66fda5ad33 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -448,17 +448,16 @@ static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *ip const u32 id2 = ntohl(*(__be32 *)&iph2->id); const u16 ipid_offset = (id >> 16) - (id2 >> 16); const u16 count = NAPI_GRO_CB(p)->count; - const u32 df = id & IP_DF; /* All fields must match except length and checksum. */ - if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF))) + if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | ((id ^ id2) & IP_DF)) return true; /* When we receive our second frame we can make a decision on if we * continue this flow as an atomic flow with a fixed ID or if we use * an incrementing ID. */ - if (count == 1 && df && !ipid_offset) + if (count == 1 && !ipid_offset) NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner; return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner))); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fc7a6955fa0a..c0542d9187e2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1393,10 +1393,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); - /* fixed ID is invalid if DF bit is not set */ fixedid = !!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCP_FIXEDID << encap)); - if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF))) - goto out; if (!skb->encapsulation || encap) udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index d5824eadea10..3d4a82a2607c 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -670,7 +670,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(9); break; - case 3: /* DF=0, Fixed - should not coalesce */ + case 3: /* DF=0, Fixed - should coalesce */ iph1->frag_off &= ~htons(IP_DF); iph1->id = htons(8); @@ -1188,10 +1188,9 @@ static void gro_receiver(void) correct_payload[0] = PAYLOAD_LEN * 2; check_recv_pkts(rxfd, correct_payload, 1); - printf("DF=0, Fixed - should not coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); + printf("DF=0, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); correct_payload[0] = PAYLOAD_LEN * 2; -- 2.36.1 Add IPIP test-cases to the GRO selftest. This selftest already contains IP ID test-cases. They are now also tested for encapsulated packets. This commit also fixes ipip packet generation in the test. Signed-off-by: Richard Gobert --- tools/testing/selftests/net/gro.c | 49 ++++++++++++++++++++++++------ tools/testing/selftests/net/gro.sh | 5 +-- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 3d4a82a2607c..451dc1c1eac5 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -93,6 +93,7 @@ static bool tx_socket = true; static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; +static bool ipip; static const int num_flush_id_cases = 6; static void vlog(const char *fmt, ...) @@ -114,7 +115,9 @@ static void setup_sock_filter(int fd) int ipproto_off, opt_ipproto_off; int next_off; - if (proto == PF_INET) + if (ipip) + next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol); + else if (proto == PF_INET) next_off = offsetof(struct iphdr, protocol); else next_off = offsetof(struct ipv6hdr, nexthdr); @@ -244,7 +247,7 @@ static void fill_datalinklayer(void *buf) eth->h_proto = ethhdr_proto; } -static void fill_networklayer(void *buf, int payload_len) +static void fill_networklayer(void *buf, int payload_len, int protocol) { struct ipv6hdr *ip6h = buf; struct iphdr *iph = buf; @@ -254,7 +257,7 @@ static void fill_networklayer(void *buf, int payload_len) ip6h->version = 6; ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->nexthdr = protocol; ip6h->hop_limit = 8; if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) error(1, errno, "inet_pton source ip6"); @@ -266,7 +269,7 @@ static void fill_networklayer(void *buf, int payload_len) iph->version = 4; iph->ihl = 5; iph->ttl = 8; - iph->protocol = IPPROTO_TCP; + iph->protocol = protocol; iph->tot_len = htons(sizeof(struct tcphdr) + payload_len + sizeof(struct iphdr)); iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ @@ -313,9 +316,19 @@ static void create_packet(void *buf, int seq_offset, int ack_offset, { memset(buf, 0, total_hdr_len); memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, payload_len, fin); - fill_networklayer(buf + ETH_HLEN, payload_len); + + if (ipip) { + fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr), + payload_len, IPPROTO_TCP); + fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr), + IPPROTO_IPIP); + } else { + fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP); + } + fill_datalinklayer(buf); } @@ -416,6 +429,13 @@ static void recompute_packet(char *buf, char *no_ext, int extlen) iph->tot_len = htons(ntohs(iph->tot_len) + extlen); iph->check = 0; iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + + if (ipip) { + iph += 1; + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } } else { ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); } @@ -777,7 +797,7 @@ static void send_fragment4(int fd, struct sockaddr_ll *daddr) */ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); - fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP); fill_datalinklayer(buf); iph->frag_off = htons(0x6000); // DF = 1, MF = 1 @@ -1071,7 +1091,7 @@ static void gro_sender(void) * and min ipv6hdr size. Like MAX_HDR_SIZE, * MAX_PAYLOAD is defined with the larger header of the two. */ - int offset = proto == PF_INET ? 20 : 0; + int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; send_large(txfd, &daddr, remainder); @@ -1221,7 +1241,7 @@ static void gro_receiver(void) check_recv_pkts(rxfd, correct_payload, 2); } } else if (strcmp(testname, "large") == 0) { - int offset = proto == PF_INET ? 20 : 0; + int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; correct_payload[0] = (MAX_PAYLOAD + offset); @@ -1250,6 +1270,7 @@ static void parse_args(int argc, char **argv) { "iface", required_argument, NULL, 'i' }, { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, + { "ipip", no_argument, NULL, 'e' }, { "rx", no_argument, NULL, 'r' }, { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, @@ -1259,7 +1280,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1269,6 +1290,11 @@ static void parse_args(int argc, char **argv) proto = PF_INET6; ethhdr_proto = htons(ETH_P_IPV6); break; + case 'e': + ipip = true; + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; case 'd': addr4_dst = addr6_dst = optarg; break; @@ -1304,7 +1330,10 @@ int main(int argc, char **argv) { parse_args(argc, argv); - if (proto == PF_INET) { + if (ipip) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET) { tcp_offset = ETH_HLEN + sizeof(struct iphdr); total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else if (proto == PF_INET6) { diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 9e3f186bc2a1..d16ec365b3cf 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -4,7 +4,7 @@ readonly SERVER_MAC="aa:00:00:00:00:02" readonly CLIENT_MAC="aa:00:00:00:00:01" readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") -readonly PROTOS=("ipv4" "ipv6") +readonly PROTOS=("ipv4" "ipv6" "ipip") dev="" test="all" proto="ipv4" @@ -31,7 +31,8 @@ run_test() { 1>>log.txt wait "${server_pid}" exit_code=$? - if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ + if [[ ( ${test} == "large" || ${protocol} == "ipip" ) && \ + -n "${KSFT_MACHINE_SLOW}" && \ ${exit_code} -ne 0 ]]; then echo "Ignoring errors due to slow environment" 1>&2 exit_code=0 -- 2.36.1