From: Jason Xing It's beneficial for small data transmission. Replace per-SKB kmalloc_reserve() with on-demand bulk allocation from skb_small_head_cache for small packets. Add a persistent per-socket data buffer cache (batch.data_cache / batch.data_count) that survives across batch cycles, similar to how batch.send_queue caches built SKBs. Inside the Phase-1 per-descriptor loop, when a small packet needs a data buffer and the cache is empty, a single kmem_cache_alloc_bulk() refills it with generic_xmit_batch objects. Subsequent small packets pop directly from the cache. Large packets bypass the cache entirely and fall back to kmalloc_reserve(). Unused buffers remain in the cache for the next batch. I observed that kmalloc_reserve() consumes nearly 40% which seems unavoidable at the first glance, thinking adding the bulk mechanism should contribute to the performance. That's the motivation of this patch. Now, the feature gives us around 10% improvement. Signed-off-by: Jason Xing --- include/net/xdp_sock.h | 2 ++ net/core/skbuff.c | 27 ++++++++++++++++++++++----- net/xdp/xsk.c | 24 ++++++++++++++++++++---- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 84f0aee3fb10..2151aab8f0a1 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -51,6 +51,8 @@ struct xsk_batch { struct sk_buff **skb_cache; struct xdp_desc *desc_cache; struct sk_buff_head send_queue; + unsigned int data_count; + void **data_cache; }; struct xdp_sock { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f29cecacd8bb..5726b1566b2b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -661,9 +661,11 @@ int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err unsigned int total_truesize = 0; struct sk_buff *skb = NULL; int node = NUMA_NO_NODE; + void **dc = batch->data_cache; + unsigned int dc_count = batch->data_count; u32 i = 0, j, k = 0; bool need_alloc; - u8 *data; + void *data; base_len = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom)); if (!(dev->priv_flags & IFF_TX_SKB_NO_LINEAR)) @@ -683,6 +685,13 @@ int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err nb_pkts = skb_count; alloc_data: + if (dc_count < nb_pkts && !(gfp_mask & KMALLOC_NOT_NORMAL_BITS)) + dc_count += kmem_cache_alloc_bulk( + net_hotdata.skb_small_head_cache, + gfp_mask | __GFP_NOMEMALLOC | __GFP_NOWARN, + batch->generic_xmit_batch - dc_count, + &dc[dc_count]); + /* * Phase 1: Allocate data buffers and initialize SKBs. * Pre-scan descriptors to determine packet boundaries, so we can @@ -710,10 +719,17 @@ int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err skb = skbs[skb_count - 1 - i]; skbuff_clear(skb); - data = kmalloc_reserve(&size, gfp_mask, node, skb); - if (unlikely(!data)) { - *err = -ENOBUFS; - break; + if (dc_count && + SKB_HEAD_ALIGN(size) <= SKB_SMALL_HEAD_CACHE_SIZE) { + data = dc[--dc_count]; + size = SKB_SMALL_HEAD_CACHE_SIZE; + } else { + data = kmalloc_reserve(&size, gfp_mask, + node, skb); + if (unlikely(!data)) { + *err = -ENOBUFS; + break; + } } __finalize_skb_around(skb, data, size); /* Replace skb_set_owner_w() with the following */ @@ -762,6 +778,7 @@ int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err while (k < i) kfree_skb(skbs[skb_count - 1 - k++]); + batch->data_count = dc_count; batch->skb_count = skb_count - i; return j; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f97bc9cf9b9a..7a6991bc19a8 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1229,14 +1229,22 @@ static void xsk_delete_from_maps(struct xdp_sock *xs) } static void xsk_batch_reset(struct xsk_batch *batch, struct sk_buff **skbs, - struct xdp_desc *descs, unsigned int size) -{ + struct xdp_desc *descs, void **data, + unsigned int size) +{ + if (batch->data_count) + kmem_cache_free_bulk(net_hotdata.skb_small_head_cache, + batch->data_count, + batch->data_cache); + kfree(batch->data_cache); if (batch->skb_count) kmem_cache_free_bulk(net_hotdata.skbuff_cache, batch->skb_count, (void **)batch->skb_cache); kfree(batch->skb_cache); kvfree(batch->desc_cache); + batch->data_cache = data; + batch->data_count = 0; batch->skb_cache = skbs; batch->desc_cache = descs; batch->skb_count = 0; @@ -1272,7 +1280,7 @@ static int xsk_release(struct socket *sock) xskq_destroy(xs->tx); xskq_destroy(xs->fq_tmp); xskq_destroy(xs->cq_tmp); - xsk_batch_reset(&xs->batch, NULL, NULL, 0); + xsk_batch_reset(&xs->batch, NULL, NULL, NULL, 0); sock_orphan(sk); sock->sk = NULL; @@ -1620,6 +1628,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, struct xsk_batch *batch = &xs->batch; struct xdp_desc *descs; struct sk_buff **skbs; + void **data; unsigned int size; int ret = 0; @@ -1638,14 +1647,21 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, ret = -ENOMEM; goto out; } + data = kmalloc_array(size, sizeof(void *), GFP_KERNEL); + if (!data) { + kfree(skbs); + ret = -ENOMEM; + goto out; + } descs = kvcalloc(size, sizeof(struct xdp_desc), GFP_KERNEL); if (!descs) { + kfree(data); kfree(skbs); ret = -ENOMEM; goto out; } - xsk_batch_reset(batch, skbs, descs, size); + xsk_batch_reset(batch, skbs, descs, data, size); out: mutex_unlock(&xs->mutex); return ret; -- 2.41.3