Currently {tcp,udp}_gro_receive relay on the gro network stage setting the correct transport header offset for all the skbs held by the GRO engine. Such assumption is not necessary, as the code can instead leverage the offset already available for the currently processed skb. Add a couple of helpers to for readabilty' sake. As skb->transport_header lays on a different cacheline wrt skb->data, this should save a cacheline access for each packet aggregation. Additionally this will make the next patch possible. Note that the compiler (gcc 15.2.1) does inline the tcp_gro_lookup() call in tcp_gro_receive(), so the additional argument is only relevant for the fraglist case. Signed-off-by: Paolo Abeni --- include/net/gro.h | 26 ++++++++++++++++++++++++++ include/net/tcp.h | 3 ++- net/ipv4/tcp_offload.c | 15 ++++++++------- net/ipv4/udp_offload.c | 4 ++-- net/ipv6/tcpv6_offload.c | 2 +- 5 files changed, 39 insertions(+), 11 deletions(-) diff --git a/include/net/gro.h b/include/net/gro.h index b65f631c521d..fdb9285ab117 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -420,6 +420,18 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, struct sock *sk); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); +/* Return the skb hdr corresponding to the specified skb2 hdr. + * skb2 is held in the gro engine, i.e. its headers are in the linear part. + */ +static inline const void * +skb_gro_header_from(const struct sk_buff *skb, const struct sk_buff *skb2, + const void *hdr2) +{ + size_t offset = (unsigned char *)hdr2 - skb2->data; + + return skb->data + offset; +} + static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { struct udphdr *uh; @@ -432,6 +444,13 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) return uh; } +static inline const struct udphdr * +udp_gro_udphdr_from(const struct sk_buff *skb, const struct sk_buff *skb2, + const struct udphdr *uh) +{ + return (const struct udphdr *)skb_gro_header_from(skb, skb2, uh); +} + static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, int proto) { @@ -620,4 +639,11 @@ static inline struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb) return th; } +static inline const struct tcphdr * +tcp_gro_header_from(const struct sk_buff *skb, const struct sk_buff *skb2, + const struct tcphdr *th) +{ + return (const struct tcphdr *)skb_gro_header_from(skb, skb2, th); +} + #endif /* _NET_GRO_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0deb5e9dd911..a4c239daf2ea 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2313,7 +2313,8 @@ void tcp_v4_destroy_sock(struct sock *sk); struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features); -struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th); +struct sk_buff *tcp_gro_lookup(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th); struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fdda18b1abda..fa36686df6d7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -261,16 +261,17 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, return segs; } -struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th) +struct sk_buff *tcp_gro_lookup(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th) { - struct tcphdr *th2; + const struct tcphdr *th2; struct sk_buff *p; list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; - th2 = tcp_hdr(p); + th2 = tcp_gro_header_from(p, skb, th); if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { NAPI_GRO_CB(p)->same_flow = 0; continue; @@ -287,8 +288,8 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, { unsigned int thlen = th->doff * 4; struct sk_buff *pp = NULL; + const struct tcphdr *th2; struct sk_buff *p; - struct tcphdr *th2; unsigned int len; __be32 flags; unsigned int mss = 1; @@ -298,11 +299,11 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, len = skb_gro_len(skb); flags = tcp_flag_word(th); - p = tcp_gro_lookup(head, th); + p = tcp_gro_lookup(head, skb, th); if (!p) goto out_check_final; - th2 = tcp_hdr(p); + th2 = tcp_gro_header_from(p, skb, th); flush = (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -398,7 +399,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) return; - p = tcp_gro_lookup(head, th); + p = tcp_gro_lookup(head, skb, th); if (p) { NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; return; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 19d0b5b09ffa..7048cb2a28a2 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -701,7 +701,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, { struct udphdr *uh = udp_gro_udphdr(skb); struct sk_buff *pp = NULL; - struct udphdr *uh2; + const struct udphdr *uh2; struct sk_buff *p; unsigned int ulen; int ret = 0; @@ -726,7 +726,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, if (!NAPI_GRO_CB(p)->same_flow) continue; - uh2 = udp_hdr(p); + uh2 = udp_gro_udphdr_from(p, skb, uh); /* Match ports only, as csum is always non zero */ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index effeba58630b..ae481bf95651 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -27,7 +27,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) return; - p = tcp_gro_lookup(head, th); + p = tcp_gro_lookup(head, skb, th); if (p) { NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; return; -- 2.52.0 After the previous patch, the GRO engine receive callbacks don't relay anymore on the skb transport header being set. Move such operation at GRO complete time, with one notable exception: SKB_GSO_FRAGLIST offload need the headers to be set on each skb in the list prior to segmentation. This prevents the NAPI gro_cell instance on top of a geneve tunnel with GRO hints enabled from corrupting the GRO-hint-aggregated packet setting the (innermost) transport header to the middle-one before stopping the GRO process due to the encap mark. Signed-off-by: Paolo Abeni --- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp_offload.c | 1 + net/ipv4/udp_offload.c | 4 ++++ net/ipv6/ip6_offload.c | 3 +-- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 08d811f11896..f954ab78481a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1527,7 +1527,6 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) * as we already checked checksum over ipv4 header was 0 */ skb_gro_pull(skb, sizeof(*iph)); - skb_set_transport_header(skb, skb_gro_offset(skb)); pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive, ops->callbacks.gro_receive, head, skb); @@ -1611,6 +1610,7 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff) if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out; + skb_set_transport_header(skb, nhoff + sizeof(*iph)); /* Only need to add sizeof(*iph) to get to the next hdr below * because any hdr with option will have been flushed in * inet_gro_receive(). diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fa36686df6d7..a78d9b15de06 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -334,6 +334,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, flush |= skb->csum_level != p->csum_level; flush |= NAPI_GRO_CB(p)->count >= 64; skb_set_network_header(skb, skb_gro_receive_network_offset(skb)); + skb_set_transport_header(skb, (unsigned char *)th - skb->data); if (flush || skb_gro_receive_list(p, skb)) mss = 1; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 7048cb2a28a2..73edbc154cfa 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -751,6 +751,8 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, pp = p; } else { if (NAPI_GRO_CB(skb)->is_flist) { + int offset; + if (!pskb_may_pull(skb, skb_gro_offset(skb))) { NAPI_GRO_CB(skb)->flush = 1; return NULL; @@ -761,6 +763,8 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return NULL; } skb_set_network_header(skb, skb_gro_receive_network_offset(skb)); + offset = (unsigned char *)uh - skb->data; + skb_set_transport_header(skb, offset); ret = skb_gro_receive_list(p, skb); } else { skb_gro_postpull_rcsum(skb, uh, diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index fce91183797a..ed71cbd45690 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -256,8 +256,6 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, skb_gro_pull(skb, sizeof(*iph)); } - skb_set_transport_header(skb, skb_gro_offset(skb)); - NAPI_GRO_CB(skb)->proto = proto; flush--; @@ -382,6 +380,7 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out; + skb_set_transport_header(skb, nhoff); err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete, udp6_gro_complete, skb, nhoff); -- 2.52.0