clang does not inline this helper in GRO fast path. We can save space and cpu cycles. $ scripts/bloat-o-meter -t vmlinux.0 vmlinux.1 add/remove: 0/2 grow/shrink: 2/0 up/down: 156/-218 (-62) Function old new delta tcp6_gro_complete 227 311 +84 tcp4_gro_complete 325 397 +72 __pfx___skb_incr_checksum_unnecessary 32 - -32 __skb_incr_checksum_unnecessary 186 - -186 Total: Before=22592724, After=22592662, chg -0.00% Signed-off-by: Eric Dumazet --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 86737076101d4a8452e90fe78adcdcfdefb79169..e6bfe5d0c5252b2e7540e1fef9317aab83feced2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4763,7 +4763,7 @@ static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) } } -static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) +static __always_inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_UNNECESSARY) { if (skb->csum_level < SKB_MAX_CSUM_LEVEL) -- 2.52.0.457.g6b5491de43-goog FDO/LTO are unable to inline tcp6_gro_receive() from ipv6_gro_receive() Make sure tcp6_check_fraglist_gro() is only called only when needed, so that compiler can leave it out-of-line. $ scripts/bloat-o-meter -t vmlinux.1 vmlinux.2 add/remove: 2/0 grow/shrink: 3/1 up/down: 1123/-253 (870) Function old new delta ipv6_gro_receive 1069 1846 +777 tcp6_check_fraglist_gro - 272 +272 ipv6_offload_init 218 274 +56 __pfx_tcp6_check_fraglist_gro - 16 +16 ipv6_gro_complete 433 435 +2 tcp6_gro_receive 959 706 -253 Total: Before=22592662, After=22593532, chg +0.00% Signed-off-by: Eric Dumazet --- include/net/gro.h | 3 +-- include/net/tcp.h | 1 - net/ipv6/Makefile | 2 +- net/ipv6/ip6_offload.c | 22 +++++++++++++--------- net/ipv6/tcpv6_offload.c | 10 ++++------ net/ipv6/udp_offload.c | 1 - 6 files changed, 19 insertions(+), 20 deletions(-) diff --git a/include/net/gro.h b/include/net/gro.h index b65f631c521d7d9741ef86781add0038c9ce4055..85e5eeed4c90feef9440c57af9382b0e9ead1219 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -405,8 +405,7 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); +struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *); INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 15f9b20f851fe322f4417ff403c3965436aa3f9f..3b94c84888a884d9ca8eb602ad1f7d4f941f3ef9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2327,7 +2327,6 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); #ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); #else diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index d283c59df4c1c421bc043056fe11e5437cc4aece..0492f1a0b4918ada8c56cf649fbec04c7114863a 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -45,7 +45,7 @@ obj-$(CONFIG_IPV6_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o \ - ip6_offload.o tcpv6_offload.o exthdrs_offload.o + ip6_offload.o exthdrs_offload.o obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index fce91183797a60fcbf271c73e086aeb0aa9d40c6..4d96154c0dcd019322908ab6ddaa663a2a565e44 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -19,6 +19,7 @@ #include #include "ip6_offload.h" +#include "tcpv6_offload.c" /* All GRO functions are always builtin, except UDP over ipv6, which lays in * ipv6 module, as it depends on UDPv6 lookup function, so we need special care @@ -30,13 +31,6 @@ #define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) #endif -#define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \ -({ \ - unlikely(gro_recursion_inc_test(skb)) ? \ - NAPI_GRO_CB(skb)->flush |= 1, NULL : \ - INDIRECT_CALL_L4(cb, f2, f1, head, skb); \ -}) - static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto) { const struct net_offload *ops = NULL; @@ -298,9 +292,19 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, skb_gro_postpull_rcsum(skb, iph, nlen); - pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive, - ops->callbacks.gro_receive, head, skb); + if (unlikely(gro_recursion_inc_test(skb))) { + flush = 1; + goto out; + } + if (likely(proto == IPPROTO_TCP)) + pp = tcp6_gro_receive(head, skb); +#if IS_BUILTIN(CONFIG_IPV6) + else if (likely(proto == IPPROTO_UDP)) + pp = udp6_gro_receive(head, skb); +#endif + else + pp = ops->callbacks.gro_receive(head, skb); out: skb_gro_flush_final(skb, pp, flush); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index effeba58630b5ac2593b824bd8fc10a473954b6c..7f19ce423058870f285b7f8ae2a4d116d783f9fb 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -24,9 +24,6 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, struct net *net; int iif, sdif; - if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) - return; - p = tcp_gro_lookup(head, th); if (p) { NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; @@ -45,8 +42,8 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, #endif /* IS_ENABLED(CONFIG_IPV6) */ } -INDIRECT_CALLABLE_SCOPE -struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) +static __always_inline struct sk_buff *tcp6_gro_receive(struct list_head *head, + struct sk_buff *skb) { struct tcphdr *th; @@ -60,7 +57,8 @@ struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) if (!th) goto flush; - tcp6_check_fraglist_gro(head, skb, th); + if (unlikely(skb->dev->features & NETIF_F_GRO_FRAGLIST)) + tcp6_check_fraglist_gro(head, skb, th); return tcp_gro_receive(head, skb, th); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 046f13b1d77a19b9ec4e0a07f531081d174defb1..e90aaa84941c60ec0fa2e23051c422064a959096 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -132,7 +132,6 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, sdif, net->ipv4.udp_table, NULL); } -INDIRECT_CALLABLE_SCOPE struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); -- 2.52.0.457.g6b5491de43-goog Remove one function call from GRO stack for native IPv6 + TCP packets. $ scripts/bloat-o-meter -t vmlinux.2 vmlinux.3 add/remove: 0/0 grow/shrink: 1/1 up/down: 298/-5 (293) Function old new delta ipv6_gro_complete 435 733 +298 tcp6_gro_complete 311 306 -5 Total: Before=22593532, After=22593825, chg +0.00% Signed-off-by: Eric Dumazet --- include/net/gro.h | 2 +- include/net/tcp.h | 1 - net/ipv6/ip6_offload.c | 21 +++++++++------------ net/ipv6/tcpv6_offload.c | 2 +- net/ipv6/udp_offload.c | 2 +- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/include/net/gro.h b/include/net/gro.h index 85e5eeed4c90feef9440c57af9382b0e9ead1219..2300b6da05b2728ec40f42228f8fa9c195d8479c 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -406,7 +406,7 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *); -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); +int udp6_gro_complete(struct sk_buff *, int); #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 3b94c84888a884d9ca8eb602ad1f7d4f941f3ef9..ebdf59d435b8002ca9b90803f40720a58ce3e809 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2326,7 +2326,6 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); #ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); #else diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 4d96154c0dcd019322908ab6ddaa663a2a565e44..32a104ead8760d33e152e0b0a6a6896d70d155b5 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -21,16 +21,6 @@ #include "ip6_offload.h" #include "tcpv6_offload.c" -/* All GRO functions are always builtin, except UDP over ipv6, which lays in - * ipv6 module, as it depends on UDPv6 lookup function, so we need special care - * when ipv6 is built as a module - */ -#if IS_BUILTIN(CONFIG_IPV6) -#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__) -#else -#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) -#endif - static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto) { const struct net_offload *ops = NULL; @@ -383,11 +373,18 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) } nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops); + + if (likely(ops == &net_hotdata.tcpv6_offload)) + return tcp6_gro_complete(skb, nhoff); +#if IS_BUILTIN(CONFIG_IPV6) + if (ops == &net_hotdata.udpv6_offload) + return udp6_gro_complete(skb, nhoff); +#endif + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out; - err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete, - udp6_gro_complete, skb, nhoff); + err = ops->callbacks.gro_complete(skb, nhoff); out: return err; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 7f19ce423058870f285b7f8ae2a4d116d783f9fb..46fa2069d321663ed232e2836db77e3fcb1f4f07 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -67,7 +67,7 @@ static __always_inline struct sk_buff *tcp6_gro_receive(struct list_head *head, return NULL; } -INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) +static __always_inline int tcp6_gro_complete(struct sk_buff *skb, int thoff) { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e90aaa84941c60ec0fa2e23051c422064a959096..e003b8494dc0e900994e6d4d2928f6bb8dd5787e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -164,7 +164,7 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) return NULL; } -INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) +int udp6_gro_complete(struct sk_buff *skb, int nhoff) { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset); -- 2.52.0.457.g6b5491de43-goog