We are unnecessarily setting a bunch of skb fields per each processed descriptor, which is redundant for fragmented frames. Let us set these respective members for first fragment only. To address both paths that we have within xsk_build_skb(), move assignments onto xsk_set_destructor_arg() and rename it to xsk_skb_init_misc(). Signed-off-by: Maciej Fijalkowski --- net/xdp/xsk.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 72e34bd2d925..01f258894fae 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -618,11 +618,16 @@ static void xsk_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static void xsk_set_destructor_arg(struct sk_buff *skb, u64 addr) +static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, + u64 addr) { BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb)); INIT_LIST_HEAD(&XSKCB(skb)->addrs_list); + skb->dev = xs->dev; + skb->priority = READ_ONCE(xs->sk.sk_priority); + skb->mark = READ_ONCE(xs->sk.sk_mark); XSKCB(skb)->num_descs = 0; + skb->destructor = xsk_destruct_skb; skb_shinfo(skb)->destructor_arg = (void *)(uintptr_t)addr; } @@ -673,7 +678,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, skb_reserve(skb, hr); - xsk_set_destructor_arg(skb, desc->addr); + xsk_skb_init_misc(skb, xs, desc->addr); } else { xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); if (!xsk_addr) @@ -757,7 +762,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, if (unlikely(err)) goto free_err; - xsk_set_destructor_arg(skb, desc->addr); + xsk_skb_init_misc(skb, xs, desc->addr); } else { int nr_frags = skb_shinfo(skb)->nr_frags; struct xsk_addr_node *xsk_addr; @@ -826,14 +831,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, if (meta->flags & XDP_TXMD_FLAGS_LAUNCH_TIME) skb->skb_mstamp_ns = meta->request.launch_time; + xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); } } - skb->dev = dev; - skb->priority = READ_ONCE(xs->sk.sk_priority); - skb->mark = READ_ONCE(xs->sk.sk_mark); - skb->destructor = xsk_destruct_skb; - xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); xsk_inc_num_desc(skb); return skb; -- 2.43.0 Instead of using auxiliary boolean that tracks if we are at first frag when gathering all elements of skb, same functionality can be achieved with checking if skb_shared_info::nr_frags is 0. Remove @first_frag but be careful around xsk_build_skb_zerocopy() and NULL the skb pointer when it failed so that common error path does not incorrectly interpret it during decision whether to call kfree_skb(). Signed-off-by: Maciej Fijalkowski --- net/xdp/xsk.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 01f258894fae..f7e0d254a723 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -730,13 +730,13 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct xsk_tx_metadata *meta = NULL; struct net_device *dev = xs->dev; struct sk_buff *skb = xs->skb; - bool first_frag = false; int err; if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) { skb = xsk_build_skb_zerocopy(xs, desc); if (IS_ERR(skb)) { err = PTR_ERR(skb); + skb = NULL; goto free_err; } } else { @@ -747,8 +747,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, len = desc->len; if (!skb) { - first_frag = true; - hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom)); tr = dev->needed_tailroom; skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err); @@ -798,7 +796,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); } - if (first_frag && desc->options & XDP_TX_METADATA) { + if (!skb_shinfo(skb)->nr_frags && desc->options & XDP_TX_METADATA) { if (unlikely(xs->pool->tx_metadata_len == 0)) { err = -EINVAL; goto free_err; @@ -840,7 +838,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, return skb; free_err: - if (first_frag && skb) + if (skb && !skb_shinfo(skb)->nr_frags) kfree_skb(skb); if (err == -EOVERFLOW) { -- 2.43.0 xsk_build_skb() has gone wild with its size and one of the things we can do about it is to pull out a branch that takes care of metadata handling and make it a separate function. While at it, let us add metadata SW support for devices supporting IFF_TX_SKB_NO_LINEAR flag, that happen to have separate logic for building skb in xsk's generic xmit path. Acked-by: Stanislav Fomichev Reviewed-by: Jason Xing Signed-off-by: Maciej Fijalkowski --- net/xdp/xsk.c | 92 +++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 39 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f7e0d254a723..7b0c68a70888 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -657,6 +657,45 @@ static void xsk_drop_skb(struct sk_buff *skb) xsk_consume_skb(skb); } +static int xsk_skb_metadata(struct sk_buff *skb, void *buffer, + struct xdp_desc *desc, struct xsk_buff_pool *pool, + u32 hr) +{ + struct xsk_tx_metadata *meta = NULL; + + if (unlikely(pool->tx_metadata_len == 0)) + return -EINVAL; + + meta = buffer - pool->tx_metadata_len; + if (unlikely(!xsk_buff_valid_tx_metadata(meta))) + return -EINVAL; + + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) { + if (unlikely(meta->request.csum_start + + meta->request.csum_offset + + sizeof(__sum16) > desc->len)) + return -EINVAL; + + skb->csum_start = hr + meta->request.csum_start; + skb->csum_offset = meta->request.csum_offset; + skb->ip_summed = CHECKSUM_PARTIAL; + + if (unlikely(pool->tx_sw_csum)) { + int err; + + err = skb_checksum_help(skb); + if (err) + return err; + } + } + + if (meta->flags & XDP_TXMD_FLAGS_LAUNCH_TIME) + skb->skb_mstamp_ns = meta->request.launch_time; + xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); + + return 0; +} + static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, struct xdp_desc *desc) { @@ -669,6 +708,9 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, int err, i; u64 addr; + addr = desc->addr; + buffer = xsk_buff_raw_get_data(pool, addr); + if (!skb) { hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom)); @@ -679,6 +721,11 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, skb_reserve(skb, hr); xsk_skb_init_misc(skb, xs, desc->addr); + if (desc->options & XDP_TX_METADATA) { + err = xsk_skb_metadata(skb, buffer, desc, pool, hr); + if (unlikely(err)) + return ERR_PTR(err); + } } else { xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); if (!xsk_addr) @@ -692,11 +739,9 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); } - addr = desc->addr; len = desc->len; ts = pool->unaligned ? len : pool->chunk_size; - buffer = xsk_buff_raw_get_data(pool, addr); offset = offset_in_page(buffer); addr = buffer - pool->addrs; @@ -727,7 +772,6 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct xdp_desc *desc) { - struct xsk_tx_metadata *meta = NULL; struct net_device *dev = xs->dev; struct sk_buff *skb = xs->skb; int err; @@ -761,6 +805,12 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, goto free_err; xsk_skb_init_misc(skb, xs, desc->addr); + if (desc->options & XDP_TX_METADATA) { + err = xsk_skb_metadata(skb, buffer, desc, + xs->pool, hr); + if (unlikely(err)) + goto free_err; + } } else { int nr_frags = skb_shinfo(skb)->nr_frags; struct xsk_addr_node *xsk_addr; @@ -795,42 +845,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, xsk_addr->addr = desc->addr; list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); } - - if (!skb_shinfo(skb)->nr_frags && desc->options & XDP_TX_METADATA) { - if (unlikely(xs->pool->tx_metadata_len == 0)) { - err = -EINVAL; - goto free_err; - } - - meta = buffer - xs->pool->tx_metadata_len; - if (unlikely(!xsk_buff_valid_tx_metadata(meta))) { - err = -EINVAL; - goto free_err; - } - - if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) { - if (unlikely(meta->request.csum_start + - meta->request.csum_offset + - sizeof(__sum16) > len)) { - err = -EINVAL; - goto free_err; - } - - skb->csum_start = hr + meta->request.csum_start; - skb->csum_offset = meta->request.csum_offset; - skb->ip_summed = CHECKSUM_PARTIAL; - - if (unlikely(xs->pool->tx_sw_csum)) { - err = skb_checksum_help(skb); - if (err) - goto free_err; - } - } - - if (meta->flags & XDP_TXMD_FLAGS_LAUNCH_TIME) - skb->skb_mstamp_ns = meta->request.launch_time; - xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); - } } xsk_inc_num_desc(skb); -- 2.43.0