clang is not inlining skb_frag_unref() and __skb_frag_unref() in gro fast path. It also does not inline gro_try_pull_from_frag0(). Using __always_inline fixes this issue, makes the kernel faster _and_ smaller. Also change __skb_frag_ref(), skb_frag_ref() and skb_page_unref() to let them inlined for the last patch in this series. $ scripts/bloat-o-meter -t vmlinux.0 vmlinux.1 add/remove: 2/6 grow/shrink: 1/2 up/down: 218/-511 (-293) Function old new delta gro_pull_from_frag0 - 188 +188 __pfx_gro_pull_from_frag0 - 16 +16 skb_shift 1125 1139 +14 __pfx_skb_frag_unref 16 - -16 __pfx_gro_try_pull_from_frag0 16 - -16 __pfx___skb_frag_unref 16 - -16 __skb_frag_unref 36 - -36 skb_frag_unref 59 - -59 dev_gro_receive 1608 1514 -94 napi_gro_frags 892 771 -121 gro_try_pull_from_frag0 153 - -153 Total: Before=22566192, After=22565899, chg -0.00% Signed-off-by: Eric Dumazet --- include/linux/skbuff_ref.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 9e49372ef1a05efdfd4cb42973fbce109332d5bf..05c8486bafac83dba4e52ffee23d8a19c92b8f27 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -15,7 +15,7 @@ * * Takes an additional reference on the paged fragment @frag. */ -static inline void __skb_frag_ref(skb_frag_t *frag) +static __always_inline void __skb_frag_ref(skb_frag_t *frag) { get_netmem(skb_frag_netmem(frag)); } @@ -27,14 +27,14 @@ static inline void __skb_frag_ref(skb_frag_t *frag) * * Takes an additional reference on the @f'th paged fragment of @skb. */ -static inline void skb_frag_ref(struct sk_buff *skb, int f) +static __always_inline void skb_frag_ref(struct sk_buff *skb, int f) { __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } bool napi_pp_put_page(netmem_ref netmem); -static inline void skb_page_unref(netmem_ref netmem, bool recycle) +static __always_inline void skb_page_unref(netmem_ref netmem, bool recycle) { #ifdef CONFIG_PAGE_POOL if (recycle && napi_pp_put_page(netmem)) @@ -51,7 +51,7 @@ static inline void skb_page_unref(netmem_ref netmem, bool recycle) * Releases a reference on the paged fragment @frag * or recycles the page via the page_pool API. */ -static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) +static __always_inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) { skb_page_unref(skb_frag_netmem(frag), recycle); } @@ -63,7 +63,7 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) * * Releases a reference on the @f'th paged fragment of @skb. */ -static inline void skb_frag_unref(struct sk_buff *skb, int f) +static __always_inline void skb_frag_unref(struct sk_buff *skb, int f) { struct skb_shared_info *shinfo = skb_shinfo(skb); -- 2.52.0.457.g6b5491de43-goog Replace the BUG_ON() which never fired with a DEBUG_NET_WARN_ON_ONCE() $ scripts/bloat-o-meter -t vmlinux.1 vmlinux.2 add/remove: 2/2 grow/shrink: 1/1 up/down: 370/-254 (116) Function old new delta gro_try_pull_from_frag0 - 196 +196 napi_gro_frags 771 929 +158 __pfx_gro_try_pull_from_frag0 - 16 +16 __pfx_gro_pull_from_frag0 16 - -16 dev_gro_receive 1514 1464 -50 gro_pull_from_frag0 188 - -188 Total: Before=22565899, After=22566015, chg +0.00% Signed-off-by: Eric Dumazet --- net/core/gro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/gro.c b/net/core/gro.c index 76f9c3712422109ad00f15f6804abf6a8b00db43..ad326c7cdc0a1f31c416ffe60cc492d3fcd7b050 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -415,7 +415,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) { struct skb_shared_info *pinfo = skb_shinfo(skb); - BUG_ON(skb->end - skb->tail < grow); + DEBUG_NET_WARN_ON_ONCE(skb->end - skb->tail < grow); memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); -- 2.52.0.457.g6b5491de43-goog 1) Inline this small helper to reduce code size and decrease cpu costs. 2) Constify its argument. 3) Move it to include/net/netmem.h, as a prereq for the following patch. $ scripts/bloat-o-meter -t vmlinux.2 vmlinux.3 add/remove: 0/2 grow/shrink: 0/4 up/down: 0/-158 (-158) Function old new delta validate_xmit_skb 866 857 -9 __pfx_net_is_devmem_iov 16 - -16 net_is_devmem_iov 22 - -22 get_netmem 152 130 -22 put_netmem 140 114 -26 tcp_recvmsg_locked 3860 3797 -63 Total: Before=22566015, After=22565857, chg -0.00% Signed-off-by: Eric Dumazet --- include/net/netmem.h | 12 ++++++++++++ net/core/devmem.c | 5 ----- net/core/devmem.h | 7 +------ 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index 9e10f4ac50c3d4381d2404c3b079366dfc08bab7..2113a197abb315f608ee3d6d3e8a60811b3781f8 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -389,6 +389,18 @@ static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) return netmem_to_nmdesc(netmem)->dma_addr; } +#if defined(CONFIG_NET_DEVMEM) +static inline bool net_is_devmem_iov(const struct net_iov *niov) +{ + return niov->type == NET_IOV_DMABUF; +} +#else +static inline bool net_is_devmem_iov(const struct net_iov *niov) +{ + return false; +} +#endif + void get_netmem(netmem_ref netmem); void put_netmem(netmem_ref netmem); diff --git a/net/core/devmem.c b/net/core/devmem.c index 185ed2a73d1cfa55858e4e0d42be365f612662fa..63f093f7d2b23f01929b83746ae25779d3f925d0 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -30,11 +30,6 @@ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); static const struct memory_provider_ops dmabuf_devmem_ops; -bool net_is_devmem_iov(struct net_iov *niov) -{ - return niov->type == NET_IOV_DMABUF; -} - static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, struct gen_pool_chunk *chunk, void *not_used) diff --git a/net/core/devmem.h b/net/core/devmem.h index 2534c8144212c79f09389201ae190511948977fb..1c5c18581fcb146450cfb18a4b1d0ce8a17b96a9 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -141,7 +141,7 @@ struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding); void net_devmem_free_dmabuf(struct net_iov *ppiov); -bool net_is_devmem_iov(struct net_iov *niov); + struct net_devmem_dmabuf_binding * net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id); struct net_iov * @@ -214,11 +214,6 @@ static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) return 0; } -static inline bool net_is_devmem_iov(struct net_iov *niov) -{ - return false; -} - static inline struct net_devmem_dmabuf_binding * net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id) { -- 2.52.0.457.g6b5491de43-goog These helpers are used in network fast paths. Only call out-of-line helpers for netmem case. We might consider inlining __get_netmem() and __put_netmem() in the future. $ scripts/bloat-o-meter -t vmlinux.3 vmlinux.4 add/remove: 6/6 grow/shrink: 22/1 up/down: 2614/-646 (1968) Function old new delta pskb_carve 1669 1894 +225 gro_pull_from_frag0 - 206 +206 get_page 190 380 +190 skb_segment 3561 3747 +186 put_page 595 765 +170 skb_copy_ubufs 1683 1822 +139 __pskb_trim_head 276 401 +125 __pskb_copy_fclone 734 858 +124 skb_zerocopy 1092 1215 +123 pskb_expand_head 892 1008 +116 skb_split 828 940 +112 skb_release_data 297 409 +112 ___pskb_trim 829 941 +112 __skb_zcopy_downgrade_managed 120 226 +106 tcp_clone_payload 530 634 +104 esp_ssg_unref 191 294 +103 dev_gro_receive 1464 1514 +50 __put_netmem - 41 +41 __get_netmem - 41 +41 skb_shift 1139 1175 +36 skb_try_coalesce 681 714 +33 __pfx_put_page 112 144 +32 __pfx_get_page 32 64 +32 __pskb_pull_tail 1137 1168 +31 veth_xdp_get 250 267 +17 __pfx_gro_pull_from_frag0 - 16 +16 __pfx___put_netmem - 16 +16 __pfx___get_netmem - 16 +16 __pfx_put_netmem 16 - -16 __pfx_gro_try_pull_from_frag0 16 - -16 __pfx_get_netmem 16 - -16 put_netmem 114 - -114 get_netmem 130 - -130 napi_gro_frags 929 771 -158 gro_try_pull_from_frag0 196 - -196 Total: Before=22565857, After=22567825, chg +0.01% Signed-off-by: Eric Dumazet --- include/net/netmem.h | 20 ++++++++++++++++++-- net/core/skbuff.c | 31 ++++++++++--------------------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index 2113a197abb315f608ee3d6d3e8a60811b3781f8..a96b3e5e5574c1800ae7949c39366968707ab5d5 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -401,8 +401,24 @@ static inline bool net_is_devmem_iov(const struct net_iov *niov) } #endif -void get_netmem(netmem_ref netmem); -void put_netmem(netmem_ref netmem); +void __get_netmem(netmem_ref netmem); +void __put_netmem(netmem_ref netmem); + +static __always_inline void get_netmem(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + __get_netmem(netmem); + else + get_page(netmem_to_page(netmem)); +} + +static __always_inline void put_netmem(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + __put_netmem(netmem); + else + put_page(netmem_to_page(netmem)); +} #define netmem_dma_unmap_addr_set(NETMEM, PTR, ADDR_NAME, VAL) \ do { \ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c57c806edba8524d3d498800e61ae6901fbfe5fb..2a8235c3d6f7fcee8b0b28607c10db985965b8d4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -7422,31 +7422,20 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, } EXPORT_SYMBOL(csum_and_copy_from_iter_full); -void get_netmem(netmem_ref netmem) +void __get_netmem(netmem_ref netmem) { - struct net_iov *niov; + struct net_iov *niov = netmem_to_net_iov(netmem); - if (netmem_is_net_iov(netmem)) { - niov = netmem_to_net_iov(netmem); - if (net_is_devmem_iov(niov)) - net_devmem_get_net_iov(netmem_to_net_iov(netmem)); - return; - } - get_page(netmem_to_page(netmem)); + if (net_is_devmem_iov(niov)) + net_devmem_get_net_iov(netmem_to_net_iov(netmem)); } -EXPORT_SYMBOL(get_netmem); +EXPORT_SYMBOL(__get_netmem); -void put_netmem(netmem_ref netmem) +void __put_netmem(netmem_ref netmem) { - struct net_iov *niov; - - if (netmem_is_net_iov(netmem)) { - niov = netmem_to_net_iov(netmem); - if (net_is_devmem_iov(niov)) - net_devmem_put_net_iov(netmem_to_net_iov(netmem)); - return; - } + struct net_iov *niov = netmem_to_net_iov(netmem); - put_page(netmem_to_page(netmem)); + if (net_is_devmem_iov(niov)) + net_devmem_put_net_iov(netmem_to_net_iov(netmem)); } -EXPORT_SYMBOL(put_netmem); +EXPORT_SYMBOL(__put_netmem); -- 2.52.0.457.g6b5491de43-goog