From: Théo Lebrun mog_alloc_rx_buffers(), getting called at open, does not do rx buffer alloc on GEM. The bulk of the work is done by gem_rx_refill() filling up all slots with valid buffers. gem_rx_refill() is called at link up by gem_init_rings() == bp->macbgem_ops.mog_init_rings(). Move operation to macb_open(), mostly to allow it to fail early and loudly rather than init the device with Rx mostly broken. About `bool fail_early`: - When called from macb_open(), ring init fails as soon as a queue cannot be refilled. - When called from macb_hresp_error_task(), we do our best to reinit the device: we still iterate over all queues and try refilling all even if a previous queue failed. Signed-off-by: Théo Lebrun Signed-off-by: Paolo Valerio --- drivers/net/ethernet/cadence/macb.h | 2 +- drivers/net/ethernet/cadence/macb_main.c | 53 ++++++++++++++++++------ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index c69828b27dae..0acc188fe547 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1195,7 +1195,7 @@ struct macb_queue; struct macb_or_gem_ops { int (*mog_alloc_rx_buffers)(struct macb *bp); void (*mog_free_rx_buffers)(struct macb *bp); - void (*mog_init_rings)(struct macb *bp); + int (*mog_init_rings)(struct macb *bp, bool fail_early); int (*mog_rx)(struct macb_queue *queue, struct napi_struct *napi, int budget); }; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1cb49252abf5..38302cba05ad 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1481,13 +1481,14 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) return packets; } -static void gem_rx_refill(struct macb_queue *queue) +static int gem_rx_refill(struct macb_queue *queue) { unsigned int entry; struct sk_buff *skb; dma_addr_t paddr; struct macb *bp = queue->bp; struct macb_dma_desc *desc; + int err = 0; while (CIRC_SPACE(queue->rx_prepared_head, queue->rx_tail, bp->rx_ring_size) > 0) { @@ -1504,6 +1505,7 @@ static void gem_rx_refill(struct macb_queue *queue) if (unlikely(!skb)) { netdev_err(bp->dev, "Unable to allocate sk_buff\n"); + err = -ENOMEM; break; } @@ -1553,6 +1555,7 @@ static void gem_rx_refill(struct macb_queue *queue) netdev_vdbg(bp->dev, "rx ring: queue: %p, prepared head %d, tail %d\n", queue, queue->rx_prepared_head, queue->rx_tail); + return err; } /* Mark DMA descriptors from begin up to and not including end as unused */ @@ -2005,7 +2008,7 @@ static void macb_hresp_error_task(struct work_struct *work) netif_tx_stop_all_queues(dev); netif_carrier_off(dev); - bp->macbgem_ops.mog_init_rings(bp); + bp->macbgem_ops.mog_init_rings(bp, false); /* Initialize TX and RX buffers */ macb_init_buffers(bp); @@ -2779,8 +2782,6 @@ static int macb_alloc_consistent(struct macb *bp) if (!queue->tx_skb) goto out_err; } - if (bp->macbgem_ops.mog_alloc_rx_buffers(bp)) - goto out_err; /* Required for tie off descriptor for PM cases */ if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) { @@ -2792,6 +2793,11 @@ static int macb_alloc_consistent(struct macb *bp) goto out_err; } + if (bp->macbgem_ops.mog_alloc_rx_buffers(bp)) + goto out_err; + if (bp->macbgem_ops.mog_init_rings(bp, true)) + goto out_err; + return 0; out_err: @@ -2812,19 +2818,21 @@ static void macb_init_tieoff(struct macb *bp) desc->ctrl = 0; } -static void gem_init_rx_ring(struct macb_queue *queue) +static int gem_init_rx_ring(struct macb_queue *queue) { queue->rx_tail = 0; queue->rx_prepared_head = 0; - gem_rx_refill(queue); + return gem_rx_refill(queue); } -static void gem_init_rings(struct macb *bp) +static int gem_init_rings(struct macb *bp, bool fail_early) { struct macb_queue *queue; struct macb_dma_desc *desc = NULL; + int last_err = 0; unsigned int q; + int err; int i; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { @@ -2837,13 +2845,24 @@ static void gem_init_rings(struct macb *bp) queue->tx_head = 0; queue->tx_tail = 0; - gem_init_rx_ring(queue); + /* We get called in two cases: + * - open: we can propagate alloc errors (so fail early), + * - HRESP error: cannot propagate, we attempt to reinit + * all queues in case of failure. + */ + err = gem_init_rx_ring(queue); + if (err) { + last_err = err; + if (fail_early) + break; + } } macb_init_tieoff(bp); + return last_err; } -static void macb_init_rings(struct macb *bp) +static int macb_init_rings(struct macb *bp, bool fail_early) { int i; struct macb_dma_desc *desc = NULL; @@ -2860,6 +2879,7 @@ static void macb_init_rings(struct macb *bp) desc->ctrl |= MACB_BIT(TX_WRAP); macb_init_tieoff(bp); + return 0; } static void macb_reset_hw(struct macb *bp) @@ -3191,7 +3211,6 @@ static int macb_open(struct net_device *dev) goto pm_exit; } - bp->macbgem_ops.mog_init_rings(bp); macb_init_buffers(bp); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { @@ -6108,12 +6127,20 @@ static int __maybe_unused macb_resume(struct device *dev) for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { - if (macb_is_gem(bp)) - gem_init_rx_ring(queue); - else + if (macb_is_gem(bp)) { + err = gem_init_rx_ring(queue); + if (err) { + dev_err(dev, "Unable to init RX ring: %d\n", err); + return err; + } + } else { macb_init_rx_ring(queue); + } } + } + for (q = 0, queue = bp->queues; q < bp->num_queues; + ++q, ++queue) { napi_enable(&queue->napi_rx); napi_enable(&queue->napi_tx); } -- 2.53.0 This is a preparation commit as the field in later patches will no longer accommodate skbuffs, but more generic frames. Signed-off-by: Paolo Valerio Reviewed-by: Nicolai Buchwitz --- drivers/net/ethernet/cadence/macb.h | 2 +- drivers/net/ethernet/cadence/macb_main.c | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 0acc188fe547..d84ec528c12f 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1277,7 +1277,7 @@ struct macb_queue { unsigned int rx_tail; unsigned int rx_prepared_head; struct macb_dma_desc *rx_ring; - struct sk_buff **rx_skbuff; + void **rx_buff; void *rx_buffers; struct napi_struct napi_rx; struct queue_stats stats; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 38302cba05ad..fca7b9c6b670 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1499,7 +1499,7 @@ static int gem_rx_refill(struct macb_queue *queue) desc = macb_rx_desc(queue, entry); - if (!queue->rx_skbuff[entry]) { + if (!queue->rx_buff[entry]) { /* allocate sk_buff for this free entry in ring */ skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size); if (unlikely(!skb)) { @@ -1518,7 +1518,7 @@ static int gem_rx_refill(struct macb_queue *queue) break; } - queue->rx_skbuff[entry] = skb; + queue->rx_buff[entry] = skb; if (entry == bp->rx_ring_size - 1) paddr |= MACB_BIT(RX_WRAP); @@ -1621,7 +1621,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, queue->stats.rx_dropped++; break; } - skb = queue->rx_skbuff[entry]; + skb = queue->rx_buff[entry]; if (unlikely(!skb)) { netdev_err(bp->dev, "inconsistent Rx descriptor chain\n"); @@ -1630,7 +1630,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, break; } /* now everything is ready for receiving packet */ - queue->rx_skbuff[entry] = NULL; + queue->rx_buff[entry] = NULL; len = ctrl & bp->rx_frm_len_mask; netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len); @@ -2630,11 +2630,11 @@ static void gem_free_rx_buffers(struct macb *bp) int i; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - if (!queue->rx_skbuff) + if (!queue->rx_buff) continue; for (i = 0; i < bp->rx_ring_size; i++) { - skb = queue->rx_skbuff[i]; + skb = queue->rx_buff[i]; if (!skb) continue; @@ -2648,8 +2648,8 @@ static void gem_free_rx_buffers(struct macb *bp) skb = NULL; } - kfree(queue->rx_skbuff); - queue->rx_skbuff = NULL; + kfree(queue->rx_buff); + queue->rx_buff = NULL; } } @@ -2712,13 +2712,13 @@ static int gem_alloc_rx_buffers(struct macb *bp) for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { size = bp->rx_ring_size * sizeof(struct sk_buff *); - queue->rx_skbuff = kzalloc(size, GFP_KERNEL); - if (!queue->rx_skbuff) + queue->rx_buff = kzalloc(size, GFP_KERNEL); + if (!queue->rx_buff) return -ENOMEM; else netdev_dbg(bp->dev, - "Allocated %d RX struct sk_buff entries at %p\n", - bp->rx_ring_size, queue->rx_skbuff); + "Allocated %d RX buff entries at %p\n", + bp->rx_ring_size, queue->rx_buff); } return 0; } -- 2.53.0 Use the page pool allocator for the data buffers and enable skb recycling support, instead of relying on netdev_alloc_skb allocating the entire skb during the refill. The patch also add support for receiving network frames that span multiple DMA descriptors in the Cadence MACB/GEM Ethernet driver. The patch removes the requirement that limited frame reception to a single descriptor (RX_SOF && RX_EOF), also avoiding potential contiguous multi-page allocation for large frames. Signed-off-by: Paolo Valerio --- drivers/net/ethernet/cadence/Kconfig | 1 + drivers/net/ethernet/cadence/macb.h | 5 + drivers/net/ethernet/cadence/macb_main.c | 412 ++++++++++++++++------- 3 files changed, 292 insertions(+), 126 deletions(-) diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 5b2a461dfd28..ae500f717433 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -25,6 +25,7 @@ config MACB depends on PTP_1588_CLOCK_OPTIONAL select PHYLINK select CRC32 + select PAGE_POOL help The Cadence MACB ethernet interface is found on many Atmel AT32 and AT91 parts. This driver also supports the Cadence GEM (Gigabit diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index d84ec528c12f..2c6ba1b63aab 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -14,6 +14,7 @@ #include #include #include +#include #define MACB_GREGS_NBR 16 #define MACB_GREGS_VERSION 2 @@ -1281,6 +1282,8 @@ struct macb_queue { void *rx_buffers; struct napi_struct napi_rx; struct queue_stats stats; + struct page_pool *page_pool; + struct sk_buff *skb; }; struct ethtool_rx_fs_item { @@ -1304,6 +1307,8 @@ struct macb { struct macb_dma_desc *rx_ring_tieoff; dma_addr_t rx_ring_tieoff_dma; size_t rx_buffer_size; + size_t rx_headroom; + unsigned int rx_ip_align; unsigned int rx_ring_size; unsigned int tx_ring_size; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index fca7b9c6b670..84989ff0c3a9 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1481,14 +1481,57 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) return packets; } -static int gem_rx_refill(struct macb_queue *queue) +static int gem_rx_data_len(struct macb *bp, struct macb_queue *queue, + u32 desc_ctrl, bool rx_sof, bool rx_eof) +{ + int len; + + if (unlikely(!rx_sof && !queue->skb)) { + if (net_ratelimit()) + netdev_err(bp->dev, + "Received non-starting frame while expecting a starting one\n"); + return -1; + } + + if (rx_eof) { + len = desc_ctrl & bp->rx_frm_len_mask; + } else { + len = bp->rx_buffer_size; + /* First frame on !RSC skips NET_IP_ALIGN */ + if (rx_sof) + len -= bp->rx_ip_align; + } + + if (rx_eof && !rx_sof) { + if (unlikely(queue->skb->len > len)) { + if (net_ratelimit()) + netdev_err(bp->dev, + "Unexpected frame len: %d\n", len); + return -1; + } + + len -= queue->skb->len; + } + + return len; +} + +static unsigned int gem_total_rx_buffer_size(struct macb *bp) +{ + return SKB_HEAD_ALIGN(bp->rx_buffer_size + NET_SKB_PAD); +} + +static int gem_rx_refill(struct macb_queue *queue, bool napi) { - unsigned int entry; - struct sk_buff *skb; - dma_addr_t paddr; struct macb *bp = queue->bp; struct macb_dma_desc *desc; + unsigned int entry; + struct page *page; + dma_addr_t paddr; + gfp_t gfp_alloc; int err = 0; + void *data; + int offset; while (CIRC_SPACE(queue->rx_prepared_head, queue->rx_tail, bp->rx_ring_size) > 0) { @@ -1500,25 +1543,26 @@ static int gem_rx_refill(struct macb_queue *queue) desc = macb_rx_desc(queue, entry); if (!queue->rx_buff[entry]) { - /* allocate sk_buff for this free entry in ring */ - skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size); - if (unlikely(!skb)) { - netdev_err(bp->dev, - "Unable to allocate sk_buff\n"); + gfp_alloc = napi ? GFP_ATOMIC : GFP_KERNEL; + page = page_pool_alloc_frag(queue->page_pool, &offset, + gem_total_rx_buffer_size(bp), + gfp_alloc | __GFP_NOWARN); + if (!page) { + if (net_ratelimit()) + netdev_err(bp->dev, + "Unable to allocate rx buffer\n"); err = -ENOMEM; break; } - /* now fill corresponding descriptor entry */ - paddr = dma_map_single(&bp->pdev->dev, skb->data, - bp->rx_buffer_size, - DMA_FROM_DEVICE); - if (dma_mapping_error(&bp->pdev->dev, paddr)) { - dev_kfree_skb(skb); - break; - } + paddr = page_pool_get_dma_addr(page) + NET_SKB_PAD + offset; + + dma_sync_single_for_device(&bp->pdev->dev, + paddr, bp->rx_buffer_size, + page_pool_get_dma_dir(queue->page_pool)); - queue->rx_buff[entry] = skb; + data = page_address(page) + offset; + queue->rx_buff[entry] = data; if (entry == bp->rx_ring_size - 1) paddr |= MACB_BIT(RX_WRAP); @@ -1528,20 +1572,6 @@ static int gem_rx_refill(struct macb_queue *queue) */ dma_wmb(); macb_set_addr(bp, desc, paddr); - - /* Properly align Ethernet header. - * - * Hardware can add dummy bytes if asked using the RBOF - * field inside the NCFGR register. That feature isn't - * available if hardware is RSC capable. - * - * We cannot fallback to doing the 2-byte shift before - * DMA mapping because the address field does not allow - * setting the low 2/3 bits. - * It is 3 bits if HW_DMA_CAP_PTP, else 2 bits. - */ - if (!(bp->caps & MACB_CAPS_RSC)) - skb_reserve(skb, NET_IP_ALIGN); } else { desc->ctrl = 0; dma_wmb(); @@ -1582,17 +1612,21 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin, static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, int budget) { + struct skb_shared_info *shinfo; struct macb *bp = queue->bp; - unsigned int len; - unsigned int entry; - struct sk_buff *skb; - struct macb_dma_desc *desc; - int count = 0; + struct macb_dma_desc *desc; + unsigned int entry; + struct page *page; + void *buff_head; + int count = 0; + int data_len; + int nr_frags; + while (count < budget) { - u32 ctrl; + bool rxused, first_frame, last_frame; dma_addr_t addr; - bool rxused; + u32 ctrl; entry = macb_rx_ring_wrap(bp, queue->rx_tail); desc = macb_rx_desc(queue, entry); @@ -1614,58 +1648,124 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, queue->rx_tail++; count++; - if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) { - netdev_err(bp->dev, - "not whole frame pointed by descriptor\n"); + buff_head = queue->rx_buff[entry]; + if (unlikely(!buff_head)) { + if (net_ratelimit()) + netdev_err(bp->dev, + "inconsistent Rx descriptor chain\n"); bp->dev->stats.rx_dropped++; queue->stats.rx_dropped++; break; } - skb = queue->rx_buff[entry]; - if (unlikely(!skb)) { - netdev_err(bp->dev, - "inconsistent Rx descriptor chain\n"); - bp->dev->stats.rx_dropped++; - queue->stats.rx_dropped++; - break; + + first_frame = ctrl & MACB_BIT(RX_SOF); + last_frame = ctrl & MACB_BIT(RX_EOF); + + data_len = gem_rx_data_len(bp, queue, ctrl, first_frame, + last_frame); + if (data_len < 0) + goto free_frags; + + addr += first_frame ? bp->rx_ip_align : 0; + + dma_sync_single_for_cpu(&bp->pdev->dev, addr, data_len, + page_pool_get_dma_dir(queue->page_pool)); + + if (first_frame) { + if (unlikely(queue->skb)) { + if (net_ratelimit()) + netdev_warn(bp->dev, "Previous packet incomplete\n"); + dev_kfree_skb(queue->skb); + bp->dev->stats.rx_dropped++; + queue->stats.rx_dropped++; + } + + queue->skb = napi_build_skb(buff_head, gem_total_rx_buffer_size(bp)); + if (unlikely(!queue->skb)) { + if (net_ratelimit()) + netdev_err(bp->dev, + "Unable to allocate sk_buff\n"); + goto free_frags; + } + + /* Properly align Ethernet header. + * + * Hardware can add dummy bytes if asked using the RBOF + * field inside the NCFGR register. That feature isn't + * available if hardware is RSC capable. + * + * We cannot fallback to doing the 2-byte shift before + * DMA mapping because the address field does not allow + * setting the low 2/3 bits. + * It is 3 bits if HW_DMA_CAP_PTP, else 2 bits. + */ + skb_reserve(queue->skb, bp->rx_headroom); + skb_mark_for_recycle(queue->skb); + skb_put(queue->skb, data_len); + } else { + shinfo = skb_shinfo(queue->skb); + page = virt_to_head_page(buff_head); + nr_frags = shinfo->nr_frags; + + if (unlikely(nr_frags >= ARRAY_SIZE(shinfo->frags))) + goto free_frags; + + skb_add_rx_frag(queue->skb, nr_frags, page, + buff_head - page_address(page) + NET_SKB_PAD, + data_len, gem_total_rx_buffer_size(bp)); } + /* now everything is ready for receiving packet */ queue->rx_buff[entry] = NULL; - len = ctrl & bp->rx_frm_len_mask; - - netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len); - skb_put(skb, len); - dma_unmap_single(&bp->pdev->dev, addr, - bp->rx_buffer_size, DMA_FROM_DEVICE); + netdev_vdbg(bp->dev, "%s %u (len %u)\n", __func__, entry, data_len); - skb->protocol = eth_type_trans(skb, bp->dev); - skb_checksum_none_assert(skb); - if (bp->dev->features & NETIF_F_RXCSUM && - !(bp->dev->flags & IFF_PROMISC) && - GEM_BFEXT(RX_CSUM, ctrl) & GEM_RX_CSUM_CHECKED_MASK) - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (last_frame) { + bp->dev->stats.rx_packets++; + queue->stats.rx_packets++; + bp->dev->stats.rx_bytes += queue->skb->len; + queue->stats.rx_bytes += queue->skb->len; - bp->dev->stats.rx_packets++; - queue->stats.rx_packets++; - bp->dev->stats.rx_bytes += skb->len; - queue->stats.rx_bytes += skb->len; - - gem_ptp_do_rxstamp(bp, skb, desc); + queue->skb->protocol = eth_type_trans(queue->skb, bp->dev); + skb_checksum_none_assert(queue->skb); + if (bp->dev->features & NETIF_F_RXCSUM && + !(bp->dev->flags & IFF_PROMISC) && + GEM_BFEXT(RX_CSUM, ctrl) & GEM_RX_CSUM_CHECKED_MASK) + queue->skb->ip_summed = CHECKSUM_UNNECESSARY; + gem_ptp_do_rxstamp(bp, queue->skb, desc); #if defined(DEBUG) && defined(VERBOSE_DEBUG) - netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n", - skb->len, skb->csum); - print_hex_dump(KERN_DEBUG, " mac: ", DUMP_PREFIX_ADDRESS, 16, 1, - skb_mac_header(skb), 16, true); - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_ADDRESS, 16, 1, - skb->data, 32, true); + netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n", + queue->skb->len, queue->skb->csum); + print_hex_dump_debug(" mac: ", DUMP_PREFIX_ADDRESS, 16, 1, + skb_mac_header(queue->skb), 16, true); + print_hex_dump_debug("data: ", DUMP_PREFIX_ADDRESS, 16, 1, + queue->skb->data, 32, true); #endif - napi_gro_receive(napi, skb); + napi_gro_receive(napi, queue->skb); + queue->skb = NULL; + } + + continue; + +free_frags: + if (queue->skb) { + dev_kfree_skb(queue->skb); + queue->skb = NULL; + } + + if (buff_head) + page_pool_put_full_page(queue->page_pool, + virt_to_head_page(buff_head), + false); + + bp->dev->stats.rx_dropped++; + queue->stats.rx_dropped++; + queue->rx_buff[entry] = NULL; } - gem_rx_refill(queue); + gem_rx_refill(queue, true); return count; } @@ -2600,12 +2700,22 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -static void macb_init_rx_buffer_size(struct macb *bp, size_t size) +static void macb_init_rx_buffer_size(struct macb *bp, unsigned int mtu) { + unsigned int overhead; + size_t size; + if (!macb_is_gem(bp)) { bp->rx_buffer_size = MACB_RX_BUFFER_SIZE; } else { - bp->rx_buffer_size = MIN(size, RX_BUFFER_MAX); + size = mtu + ETH_HLEN + ETH_FCS_LEN; + bp->rx_buffer_size = SKB_DATA_ALIGN(size + bp->rx_ip_align); + if (gem_total_rx_buffer_size(bp) > PAGE_SIZE) { + overhead = bp->rx_headroom + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + bp->rx_buffer_size = rounddown(PAGE_SIZE - overhead, + RX_BUFFER_MULTIPLE); + } if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) { netdev_dbg(bp->dev, @@ -2616,17 +2726,16 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size) } } - netdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%zu]\n", - bp->dev->mtu, bp->rx_buffer_size); + netdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%zu] rx_headroom [%zu] total [%u]\n", + bp->dev->mtu, bp->rx_buffer_size, bp->rx_headroom, + gem_total_rx_buffer_size(bp)); } static void gem_free_rx_buffers(struct macb *bp) { - struct sk_buff *skb; - struct macb_dma_desc *desc; struct macb_queue *queue; - dma_addr_t addr; unsigned int q; + void *data; int i; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { @@ -2634,22 +2743,25 @@ static void gem_free_rx_buffers(struct macb *bp) continue; for (i = 0; i < bp->rx_ring_size; i++) { - skb = queue->rx_buff[i]; - - if (!skb) + data = queue->rx_buff[i]; + if (!data) continue; - desc = macb_rx_desc(queue, i); - addr = macb_get_addr(bp, desc); + page_pool_put_full_page(queue->page_pool, + virt_to_head_page(data), + false); + queue->rx_buff[i] = NULL; + } - dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - skb = NULL; + if (queue->skb) { + dev_kfree_skb(queue->skb); + queue->skb = NULL; } kfree(queue->rx_buff); queue->rx_buff = NULL; + page_pool_destroy(queue->page_pool); + queue->page_pool = NULL; } } @@ -2711,13 +2823,12 @@ static int gem_alloc_rx_buffers(struct macb *bp) int size; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - size = bp->rx_ring_size * sizeof(struct sk_buff *); + size = bp->rx_ring_size * sizeof(*queue->rx_buff); queue->rx_buff = kzalloc(size, GFP_KERNEL); if (!queue->rx_buff) return -ENOMEM; else - netdev_dbg(bp->dev, - "Allocated %d RX buff entries at %p\n", + netdev_dbg(bp->dev, "Allocated %d RX buff entries at %p\n", bp->rx_ring_size, queue->rx_buff); } return 0; @@ -2805,6 +2916,40 @@ static int macb_alloc_consistent(struct macb *bp) return -ENOMEM; } +static int gem_create_page_pool(struct macb_queue *queue) +{ + struct page_pool_params pp_params = { + .order = 0, + .flags = PP_FLAG_DMA_MAP, + .pool_size = queue->bp->rx_ring_size, + .nid = NUMA_NO_NODE, + .dma_dir = DMA_FROM_DEVICE, + .dev = &queue->bp->pdev->dev, + .netdev = queue->bp->dev, + .napi = &queue->napi_rx, + .max_len = PAGE_SIZE, + }; + struct page_pool *pool; + int err = 0; + + /* This can happen in the case of HRESP error. + * Do nothing as page pool is already existing. + */ + if (queue->page_pool) + return err; + + pool = page_pool_create(&pp_params); + if (IS_ERR(pool)) { + netdev_err(queue->bp->dev, "cannot create rx page pool\n"); + err = PTR_ERR(pool); + pool = NULL; + } + + queue->page_pool = pool; + + return err; +} + static void macb_init_tieoff(struct macb *bp) { struct macb_dma_desc *desc = bp->rx_ring_tieoff; @@ -2823,7 +2968,7 @@ static int gem_init_rx_ring(struct macb_queue *queue) queue->rx_tail = 0; queue->rx_prepared_head = 0; - return gem_rx_refill(queue); + return gem_rx_refill(queue, false); } static int gem_init_rings(struct macb *bp, bool fail_early) @@ -2845,6 +2990,13 @@ static int gem_init_rings(struct macb *bp, bool fail_early) queue->tx_head = 0; queue->tx_tail = 0; + /* This is a hard failure. In case of HRESP error + * recovery we always reuse the existing page pool. + */ + last_err = gem_create_page_pool(queue); + if (last_err) + break; + /* We get called in two cases: * - open: we can propagate alloc errors (so fail early), * - HRESP error: cannot propagate, we attempt to reinit @@ -2994,39 +3146,40 @@ static void macb_configure_dma(struct macb *bp) unsigned int q; u32 dmacfg; - buffer_size = bp->rx_buffer_size / RX_BUFFER_MULTIPLE; - if (macb_is_gem(bp)) { - dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L); - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - if (q) - queue_writel(queue, RBQS, buffer_size); - else - dmacfg |= GEM_BF(RXBS, buffer_size); - } - if (bp->dma_burst_length) - dmacfg = GEM_BFINS(FBLDO, bp->dma_burst_length, dmacfg); - dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L); - dmacfg &= ~GEM_BIT(ENDIA_PKT); + if (!macb_is_gem((bp))) + return; - if (bp->native_io) - dmacfg &= ~GEM_BIT(ENDIA_DESC); + buffer_size = bp->rx_buffer_size / RX_BUFFER_MULTIPLE; + dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + if (q) + queue_writel(queue, RBQS, buffer_size); else - dmacfg |= GEM_BIT(ENDIA_DESC); /* CPU in big endian */ + dmacfg |= GEM_BF(RXBS, buffer_size); + } + if (bp->dma_burst_length) + dmacfg = GEM_BFINS(FBLDO, bp->dma_burst_length, dmacfg); + dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L); + dmacfg &= ~GEM_BIT(ENDIA_PKT); - if (bp->dev->features & NETIF_F_HW_CSUM) - dmacfg |= GEM_BIT(TXCOEN); - else - dmacfg &= ~GEM_BIT(TXCOEN); + if (bp->native_io) + dmacfg &= ~GEM_BIT(ENDIA_DESC); + else + dmacfg |= GEM_BIT(ENDIA_DESC); /* CPU in big endian */ - dmacfg &= ~GEM_BIT(ADDR64); - if (macb_dma64(bp)) - dmacfg |= GEM_BIT(ADDR64); - if (macb_dma_ptp(bp)) - dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT); - netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n", - dmacfg); - gem_writel(bp, DMACFG, dmacfg); - } + if (bp->dev->features & NETIF_F_HW_CSUM) + dmacfg |= GEM_BIT(TXCOEN); + else + dmacfg &= ~GEM_BIT(TXCOEN); + + dmacfg &= ~GEM_BIT(ADDR64); + if (macb_dma64(bp)) + dmacfg |= GEM_BIT(ADDR64); + if (macb_dma_ptp(bp)) + dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT); + netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n", + dmacfg); + gem_writel(bp, DMACFG, dmacfg); } static void macb_init_hw(struct macb *bp) @@ -3189,7 +3342,6 @@ static void macb_set_rx_mode(struct net_device *dev) static int macb_open(struct net_device *dev) { - size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN; struct macb *bp = netdev_priv(dev); struct macb_queue *queue; unsigned int q; @@ -3202,7 +3354,7 @@ static int macb_open(struct net_device *dev) return err; /* RX buffers initialization */ - macb_init_rx_buffer_size(bp, bufsz); + macb_init_rx_buffer_size(bp, dev->mtu); err = macb_alloc_consistent(bp); if (err) { @@ -5871,6 +6023,14 @@ static int macb_probe(struct platform_device *pdev) if (err) goto err_out_phy_exit; + if (macb_is_gem(bp)) { + bp->rx_headroom = NET_SKB_PAD; + if (!(bp->caps & MACB_CAPS_RSC)) { + bp->rx_ip_align = NET_IP_ALIGN; + bp->rx_headroom += NET_IP_ALIGN; + } + } + netif_carrier_off(dev); err = register_netdev(dev); -- 2.53.0 The macb_tx_skb structure is renamed to macb_tx_buff with no functional changes. This is a preparatory step for adding xdp xmit support. Signed-off-by: Paolo Valerio Reviewed-by: Nicolai Buchwitz --- drivers/net/ethernet/cadence/macb.h | 8 +- drivers/net/ethernet/cadence/macb_main.c | 112 +++++++++++------------ 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 2c6ba1b63aab..1cc626088174 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -965,7 +965,7 @@ struct macb_dma_desc_ptp { /* Scaled PPM fraction */ #define PPM_FRACTION 16 -/* struct macb_tx_skb - data about an skb which is being transmitted +/* struct macb_tx_buff - data about an skb which is being transmitted * @skb: skb currently being transmitted, only set for the last buffer * of the frame * @mapping: DMA address of the skb's fragment buffer @@ -973,7 +973,7 @@ struct macb_dma_desc_ptp { * @mapped_as_page: true when buffer was mapped with skb_frag_dma_map(), * false when buffer was mapped with dma_map_single() */ -struct macb_tx_skb { +struct macb_tx_buff { struct sk_buff *skb; dma_addr_t mapping; size_t size; @@ -1267,7 +1267,7 @@ struct macb_queue { spinlock_t tx_ptr_lock; unsigned int tx_head, tx_tail; struct macb_dma_desc *tx_ring; - struct macb_tx_skb *tx_skb; + struct macb_tx_buff *tx_buff; dma_addr_t tx_ring_dma; struct work_struct tx_error_task; bool txubr_pending; @@ -1345,7 +1345,7 @@ struct macb { phy_interface_t phy_interface; /* AT91RM9200 transmit queue (1 on wire + 1 queued) */ - struct macb_tx_skb rm9200_txq[2]; + struct macb_tx_buff rm9200_txq[2]; unsigned int max_tx_length; u64 ethtool_stats[GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES]; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 84989ff0c3a9..a71d36b18170 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -159,10 +159,10 @@ static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue, return &queue->tx_ring[index]; } -static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue, - unsigned int index) +static struct macb_tx_buff *macb_tx_buff(struct macb_queue *queue, + unsigned int index) { - return &queue->tx_skb[macb_tx_ring_wrap(queue->bp, index)]; + return &queue->tx_buff[macb_tx_ring_wrap(queue->bp, index)]; } static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index) @@ -792,7 +792,7 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode, static void gem_shuffle_tx_one_ring(struct macb_queue *queue) { unsigned int head, tail, count, ring_size, desc_size; - struct macb_tx_skb tx_skb, *skb_curr, *skb_next; + struct macb_tx_buff tx_buff, *buff_curr, *buff_next; struct macb_dma_desc *desc_curr, *desc_next; unsigned int i, cycles, shift, curr, next; struct macb *bp = queue->bp; @@ -824,8 +824,8 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue) for (i = 0; i < cycles; i++) { memcpy(&desc, macb_tx_desc(queue, i), desc_size); - memcpy(&tx_skb, macb_tx_skb(queue, i), - sizeof(struct macb_tx_skb)); + memcpy(&tx_buff, macb_tx_buff(queue, i), + sizeof(struct macb_tx_buff)); curr = i; next = (curr + shift) % ring_size; @@ -841,9 +841,9 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue) if (curr == ring_size - 1) desc_curr->ctrl |= MACB_BIT(TX_WRAP); - skb_curr = macb_tx_skb(queue, curr); - skb_next = macb_tx_skb(queue, next); - memcpy(skb_curr, skb_next, sizeof(struct macb_tx_skb)); + buff_curr = macb_tx_buff(queue, curr); + buff_next = macb_tx_buff(queue, next); + memcpy(buff_curr, buff_next, sizeof(struct macb_tx_buff)); curr = next; next = (curr + shift) % ring_size; @@ -855,8 +855,8 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue) desc_curr->ctrl &= ~MACB_BIT(TX_WRAP); if (curr == ring_size - 1) desc_curr->ctrl |= MACB_BIT(TX_WRAP); - memcpy(macb_tx_skb(queue, curr), &tx_skb, - sizeof(struct macb_tx_skb)); + memcpy(macb_tx_buff(queue, curr), &tx_buff, + sizeof(struct macb_tx_buff)); } queue->tx_head = count; @@ -1197,21 +1197,21 @@ static int macb_halt_tx(struct macb *bp) bp, TSR); } -static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budget) +static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff, int budget) { - if (tx_skb->mapping) { - if (tx_skb->mapped_as_page) - dma_unmap_page(&bp->pdev->dev, tx_skb->mapping, - tx_skb->size, DMA_TO_DEVICE); + if (tx_buff->mapping) { + if (tx_buff->mapped_as_page) + dma_unmap_page(&bp->pdev->dev, tx_buff->mapping, + tx_buff->size, DMA_TO_DEVICE); else - dma_unmap_single(&bp->pdev->dev, tx_skb->mapping, - tx_skb->size, DMA_TO_DEVICE); - tx_skb->mapping = 0; + dma_unmap_single(&bp->pdev->dev, tx_buff->mapping, + tx_buff->size, DMA_TO_DEVICE); + tx_buff->mapping = 0; } - if (tx_skb->skb) { - napi_consume_skb(tx_skb->skb, budget); - tx_skb->skb = NULL; + if (tx_buff->skb) { + napi_consume_skb(tx_buff->skb, budget); + tx_buff->skb = NULL; } } @@ -1257,7 +1257,7 @@ static void macb_tx_error_task(struct work_struct *work) u32 queue_index; u32 packets = 0; u32 bytes = 0; - struct macb_tx_skb *tx_skb; + struct macb_tx_buff *tx_buff; struct macb_dma_desc *desc; struct sk_buff *skb; unsigned int tail; @@ -1297,16 +1297,16 @@ static void macb_tx_error_task(struct work_struct *work) desc = macb_tx_desc(queue, tail); ctrl = desc->ctrl; - tx_skb = macb_tx_skb(queue, tail); - skb = tx_skb->skb; + tx_buff = macb_tx_buff(queue, tail); + skb = tx_buff->skb; if (ctrl & MACB_BIT(TX_USED)) { /* skb is set for the last buffer of the frame */ while (!skb) { - macb_tx_unmap(bp, tx_skb, 0); + macb_tx_unmap(bp, tx_buff, 0); tail++; - tx_skb = macb_tx_skb(queue, tail); - skb = tx_skb->skb; + tx_buff = macb_tx_buff(queue, tail); + skb = tx_buff->skb; } /* ctrl still refers to the first buffer descriptor @@ -1335,7 +1335,7 @@ static void macb_tx_error_task(struct work_struct *work) desc->ctrl = ctrl | MACB_BIT(TX_USED); } - macb_tx_unmap(bp, tx_skb, 0); + macb_tx_unmap(bp, tx_buff, 0); } netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index), @@ -1413,7 +1413,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) spin_lock_irqsave(&queue->tx_ptr_lock, flags); head = queue->tx_head; for (tail = queue->tx_tail; tail != head && packets < budget; tail++) { - struct macb_tx_skb *tx_skb; + struct macb_tx_buff *tx_buff; struct sk_buff *skb; struct macb_dma_desc *desc; u32 ctrl; @@ -1433,8 +1433,8 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) /* Process all buffers of the current transmitted frame */ for (;; tail++) { - tx_skb = macb_tx_skb(queue, tail); - skb = tx_skb->skb; + tx_buff = macb_tx_buff(queue, tail); + skb = tx_buff->skb; /* First, update TX stats if needed */ if (skb) { @@ -1454,7 +1454,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) } /* Now we can safely release resources */ - macb_tx_unmap(bp, tx_skb, budget); + macb_tx_unmap(bp, tx_buff, budget); /* skb is set only for the last buffer of the frame. * WARNING: at this point skb has been freed by @@ -2332,8 +2332,8 @@ static unsigned int macb_tx_map(struct macb *bp, unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags; unsigned int len, i, tx_head = queue->tx_head; u32 ctrl, lso_ctrl = 0, seq_ctrl = 0; + struct macb_tx_buff *tx_buff = NULL; unsigned int eof = 1, mss_mfs = 0; - struct macb_tx_skb *tx_skb = NULL; struct macb_dma_desc *desc; unsigned int offset, size; dma_addr_t mapping; @@ -2356,7 +2356,7 @@ static unsigned int macb_tx_map(struct macb *bp, offset = 0; while (len) { - tx_skb = macb_tx_skb(queue, tx_head); + tx_buff = macb_tx_buff(queue, tx_head); mapping = dma_map_single(&bp->pdev->dev, skb->data + offset, @@ -2365,10 +2365,10 @@ static unsigned int macb_tx_map(struct macb *bp, goto dma_error; /* Save info to properly release resources */ - tx_skb->skb = NULL; - tx_skb->mapping = mapping; - tx_skb->size = size; - tx_skb->mapped_as_page = false; + tx_buff->skb = NULL; + tx_buff->mapping = mapping; + tx_buff->size = size; + tx_buff->mapped_as_page = false; len -= size; offset += size; @@ -2385,7 +2385,7 @@ static unsigned int macb_tx_map(struct macb *bp, offset = 0; while (len) { size = umin(len, bp->max_tx_length); - tx_skb = macb_tx_skb(queue, tx_head); + tx_buff = macb_tx_buff(queue, tx_head); mapping = skb_frag_dma_map(&bp->pdev->dev, frag, offset, size, DMA_TO_DEVICE); @@ -2393,10 +2393,10 @@ static unsigned int macb_tx_map(struct macb *bp, goto dma_error; /* Save info to properly release resources */ - tx_skb->skb = NULL; - tx_skb->mapping = mapping; - tx_skb->size = size; - tx_skb->mapped_as_page = true; + tx_buff->skb = NULL; + tx_buff->mapping = mapping; + tx_buff->size = size; + tx_buff->mapped_as_page = true; len -= size; offset += size; @@ -2405,13 +2405,13 @@ static unsigned int macb_tx_map(struct macb *bp, } /* Should never happen */ - if (unlikely(!tx_skb)) { + if (unlikely(!tx_buff)) { netdev_err(bp->dev, "BUG! empty skb!\n"); return 0; } /* This is the last buffer of the frame: save socket buffer */ - tx_skb->skb = skb; + tx_buff->skb = skb; /* Update TX ring: update buffer descriptors in reverse order * to avoid race condition @@ -2442,10 +2442,10 @@ static unsigned int macb_tx_map(struct macb *bp, do { i--; - tx_skb = macb_tx_skb(queue, i); + tx_buff = macb_tx_buff(queue, i); desc = macb_tx_desc(queue, i); - ctrl = (u32)tx_skb->size; + ctrl = (u32)tx_buff->size; if (eof) { ctrl |= MACB_BIT(TX_LAST); eof = 0; @@ -2468,7 +2468,7 @@ static unsigned int macb_tx_map(struct macb *bp, ctrl |= MACB_BF(MSS_MFS, mss_mfs); /* Set TX buffer descriptor */ - macb_set_addr(bp, desc, tx_skb->mapping); + macb_set_addr(bp, desc, tx_buff->mapping); /* desc->addr must be visible to hardware before clearing * 'TX_USED' bit in desc->ctrl. */ @@ -2484,9 +2484,9 @@ static unsigned int macb_tx_map(struct macb *bp, netdev_err(bp->dev, "TX DMA map failed\n"); for (i = queue->tx_head; i != tx_head; i++) { - tx_skb = macb_tx_skb(queue, i); + tx_buff = macb_tx_buff(queue, i); - macb_tx_unmap(bp, tx_skb, 0); + macb_tx_unmap(bp, tx_buff, 0); } return -ENOMEM; @@ -2809,8 +2809,8 @@ static void macb_free_consistent(struct macb *bp) dma_free_coherent(dev, size, bp->queues[0].rx_ring, bp->queues[0].rx_ring_dma); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - kfree(queue->tx_skb); - queue->tx_skb = NULL; + kfree(queue->tx_buff); + queue->tx_buff = NULL; queue->tx_ring = NULL; queue->rx_ring = NULL; } @@ -2888,9 +2888,9 @@ static int macb_alloc_consistent(struct macb *bp) queue->rx_ring = rx + macb_rx_ring_size_per_queue(bp) * q; queue->rx_ring_dma = rx_dma + macb_rx_ring_size_per_queue(bp) * q; - size = bp->tx_ring_size * sizeof(struct macb_tx_skb); - queue->tx_skb = kmalloc(size, GFP_KERNEL); - if (!queue->tx_skb) + size = bp->tx_ring_size * sizeof(struct macb_tx_buff); + queue->tx_buff = kmalloc(size, GFP_KERNEL); + if (!queue->tx_buff) goto out_err; } -- 2.53.0 Rename struct macb_tx_buff member skb to ptr and introduce macb_tx_buff_type to identify the buffer type macb_tx_buff represents. Currently the buffer can only be MACB_TYPE_SKB, so the sk_buff case is handled unconditionally in the tx path. The remaining type handling will be handled by subsequent patches. This is the last preparatory step for XDP xmit support. Signed-off-by: Paolo Valerio Reviewed-by: Nicolai Buchwitz --- drivers/net/ethernet/cadence/macb.h | 25 ++++++++----- drivers/net/ethernet/cadence/macb_main.c | 45 ++++++++++++++---------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 1cc626088174..d8c581394b98 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -965,19 +965,28 @@ struct macb_dma_desc_ptp { /* Scaled PPM fraction */ #define PPM_FRACTION 16 -/* struct macb_tx_buff - data about an skb which is being transmitted - * @skb: skb currently being transmitted, only set for the last buffer - * of the frame - * @mapping: DMA address of the skb's fragment buffer +enum macb_tx_buff_type { + MACB_TYPE_SKB, + MACB_TYPE_XDP_TX, + MACB_TYPE_XDP_NDO, +}; + +/* struct macb_tx_buff - data about an skb or xdp frame which is being + * transmitted. + * @ptr: pointer to skb or xdp frame being transmitted, only set + * for the last buffer for sk_buff + * @mapping: DMA address of the skb's fragment or xdp buffer * @size: size of the DMA mapped buffer * @mapped_as_page: true when buffer was mapped with skb_frag_dma_map(), * false when buffer was mapped with dma_map_single() + * @type: type of buffer (MACB_TYPE_SKB, MACB_TYPE_XDP_TX, MACB_TYPE_XDP_NDO) */ struct macb_tx_buff { - struct sk_buff *skb; - dma_addr_t mapping; - size_t size; - bool mapped_as_page; + void *ptr; + dma_addr_t mapping; + size_t size; + bool mapped_as_page; + enum macb_tx_buff_type type; }; /* Hardware-collected statistics. Used when updating the network diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index a71d36b18170..4c5dd70f003b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1197,7 +1197,8 @@ static int macb_halt_tx(struct macb *bp) bp, TSR); } -static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff, int budget) +static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff, + int budget) { if (tx_buff->mapping) { if (tx_buff->mapped_as_page) @@ -1209,9 +1210,9 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff, int bud tx_buff->mapping = 0; } - if (tx_buff->skb) { - napi_consume_skb(tx_buff->skb, budget); - tx_buff->skb = NULL; + if (tx_buff->ptr) { + napi_consume_skb(tx_buff->ptr, budget); + tx_buff->ptr = NULL; } } @@ -1298,7 +1299,7 @@ static void macb_tx_error_task(struct work_struct *work) desc = macb_tx_desc(queue, tail); ctrl = desc->ctrl; tx_buff = macb_tx_buff(queue, tail); - skb = tx_buff->skb; + skb = tx_buff->ptr; if (ctrl & MACB_BIT(TX_USED)) { /* skb is set for the last buffer of the frame */ @@ -1306,7 +1307,7 @@ static void macb_tx_error_task(struct work_struct *work) macb_tx_unmap(bp, tx_buff, 0); tail++; tx_buff = macb_tx_buff(queue, tail); - skb = tx_buff->skb; + skb = tx_buff->ptr; } /* ctrl still refers to the first buffer descriptor @@ -1403,20 +1404,22 @@ static bool ptp_one_step_sync(struct sk_buff *skb) static int macb_tx_complete(struct macb_queue *queue, int budget) { struct macb *bp = queue->bp; - u16 queue_index = queue - bp->queues; unsigned long flags; unsigned int tail; unsigned int head; + u16 queue_index; int packets = 0; u32 bytes = 0; + queue_index = queue - bp->queues; + spin_lock_irqsave(&queue->tx_ptr_lock, flags); head = queue->tx_head; for (tail = queue->tx_tail; tail != head && packets < budget; tail++) { - struct macb_tx_buff *tx_buff; - struct sk_buff *skb; - struct macb_dma_desc *desc; - u32 ctrl; + struct macb_tx_buff *tx_buff; + struct macb_dma_desc *desc; + struct sk_buff *skb; + u32 ctrl; desc = macb_tx_desc(queue, tail); @@ -1434,7 +1437,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) /* Process all buffers of the current transmitted frame */ for (;; tail++) { tx_buff = macb_tx_buff(queue, tail); - skb = tx_buff->skb; + skb = tx_buff->ptr; /* First, update TX stats if needed */ if (skb) { @@ -2365,7 +2368,8 @@ static unsigned int macb_tx_map(struct macb *bp, goto dma_error; /* Save info to properly release resources */ - tx_buff->skb = NULL; + tx_buff->ptr = NULL; + tx_buff->type = MACB_TYPE_SKB; tx_buff->mapping = mapping; tx_buff->size = size; tx_buff->mapped_as_page = false; @@ -2393,7 +2397,8 @@ static unsigned int macb_tx_map(struct macb *bp, goto dma_error; /* Save info to properly release resources */ - tx_buff->skb = NULL; + tx_buff->ptr = NULL; + tx_buff->type = MACB_TYPE_SKB; tx_buff->mapping = mapping; tx_buff->size = size; tx_buff->mapped_as_page = true; @@ -2411,7 +2416,8 @@ static unsigned int macb_tx_map(struct macb *bp, } /* This is the last buffer of the frame: save socket buffer */ - tx_buff->skb = skb; + tx_buff->ptr = skb; + tx_buff->type = MACB_TYPE_SKB; /* Update TX ring: update buffer descriptors in reverse order * to avoid race condition @@ -5283,8 +5289,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb, netif_stop_queue(dev); /* Store packet information (to free when Tx completed) */ - lp->rm9200_txq[desc].skb = skb; + lp->rm9200_txq[desc].ptr = skb; lp->rm9200_txq[desc].size = skb->len; + lp->rm9200_txq[desc].type = MACB_TYPE_SKB; lp->rm9200_txq[desc].mapping = dma_map_single(&lp->pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) { @@ -5376,9 +5383,9 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id) dev->stats.tx_errors++; desc = 0; - if (lp->rm9200_txq[desc].skb) { - dev_consume_skb_irq(lp->rm9200_txq[desc].skb); - lp->rm9200_txq[desc].skb = NULL; + if (lp->rm9200_txq[desc].ptr) { + dev_consume_skb_irq(lp->rm9200_txq[desc].ptr); + lp->rm9200_txq[desc].ptr = NULL; dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping, lp->rm9200_txq[desc].size, DMA_TO_DEVICE); dev->stats.tx_packets++; -- 2.53.0 Introduce basic XDP support for macb/gem with the XDP_TX, XDP_PASS, XDP_DROP, XDP_REDIRECT verdict support. Signed-off-by: Paolo Valerio Tested-by: Nicolai Buchwitz --- drivers/net/ethernet/cadence/macb.h | 3 + drivers/net/ethernet/cadence/macb_main.c | 366 ++++++++++++++++++++--- 2 files changed, 335 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index d8c581394b98..a1cec805ee92 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -15,6 +15,7 @@ #include #include #include +#include #define MACB_GREGS_NBR 16 #define MACB_GREGS_VERSION 2 @@ -1293,6 +1294,7 @@ struct macb_queue { struct queue_stats stats; struct page_pool *page_pool; struct sk_buff *skb; + struct xdp_rxq_info xdp_rxq; }; struct ethtool_rx_fs_item { @@ -1398,6 +1400,7 @@ struct macb { struct macb_pm_data pm_data; const struct macb_usrio_config *usrio; + struct bpf_prog __rcu *prog; }; #ifdef CONFIG_MACB_USE_HWSTAMP diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4c5dd70f003b..537d02264851 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -6,6 +6,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1197,6 +1198,18 @@ static int macb_halt_tx(struct macb *bp) bp, TSR); } +static void macb_tx_release_buff(void *buff, enum macb_tx_buff_type type, int budget) +{ + if (type == MACB_TYPE_SKB) { + napi_consume_skb(buff, budget); + } else { + if (!budget) + xdp_return_frame(buff); + else + xdp_return_frame_rx_napi(buff); + } +} + static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff, int budget) { @@ -1211,7 +1224,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff, } if (tx_buff->ptr) { - napi_consume_skb(tx_buff->ptr, budget); + macb_tx_release_buff(tx_buff->ptr, tx_buff->type, budget); tx_buff->ptr = NULL; } } @@ -1275,7 +1288,8 @@ static void macb_tx_error_task(struct work_struct *work) * network engine about the macb/gem being halted. */ napi_disable(&queue->napi_tx); - spin_lock_irqsave(&bp->lock, flags); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); + spin_lock(&bp->lock); /* Make sure nobody is trying to queue up new packets */ netif_tx_stop_all_queues(bp->dev); @@ -1299,6 +1313,10 @@ static void macb_tx_error_task(struct work_struct *work) desc = macb_tx_desc(queue, tail); ctrl = desc->ctrl; tx_buff = macb_tx_buff(queue, tail); + + if (tx_buff->type != MACB_TYPE_SKB) + goto unmap; + skb = tx_buff->ptr; if (ctrl & MACB_BIT(TX_USED)) { @@ -1336,6 +1354,7 @@ static void macb_tx_error_task(struct work_struct *work) desc->ctrl = ctrl | MACB_BIT(TX_USED); } +unmap: macb_tx_unmap(bp, tx_buff, 0); } @@ -1367,7 +1386,8 @@ static void macb_tx_error_task(struct work_struct *work) netif_tx_start_all_queues(bp->dev); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - spin_unlock_irqrestore(&bp->lock, flags); + spin_unlock(&bp->lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); napi_enable(&queue->napi_tx); } @@ -1405,6 +1425,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) { struct macb *bp = queue->bp; unsigned long flags; + int skb_packets = 0; unsigned int tail; unsigned int head; u16 queue_index; @@ -1419,6 +1440,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) struct macb_tx_buff *tx_buff; struct macb_dma_desc *desc; struct sk_buff *skb; + void *data = NULL; u32 ctrl; desc = macb_tx_desc(queue, tail); @@ -1437,10 +1459,18 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) /* Process all buffers of the current transmitted frame */ for (;; tail++) { tx_buff = macb_tx_buff(queue, tail); - skb = tx_buff->ptr; + + if (tx_buff->type != MACB_TYPE_SKB) { + data = tx_buff->ptr; + packets++; + goto unmap; + } /* First, update TX stats if needed */ - if (skb) { + if (tx_buff->ptr) { + data = tx_buff->ptr; + skb = tx_buff->ptr; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && !ptp_one_step_sync(skb)) gem_ptp_do_txstamp(bp, skb, desc); @@ -1452,24 +1482,26 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) queue->stats.tx_packets++; bp->dev->stats.tx_bytes += skb->len; queue->stats.tx_bytes += skb->len; + skb_packets++; packets++; bytes += skb->len; } +unmap: /* Now we can safely release resources */ macb_tx_unmap(bp, tx_buff, budget); - /* skb is set only for the last buffer of the frame. - * WARNING: at this point skb has been freed by + /* data is set only for the last buffer of the frame. + * WARNING: at this point the buffer has been freed by * macb_tx_unmap(). */ - if (skb) + if (data) break; } } netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index), - packets, bytes); + skb_packets, bytes); queue->tx_tail = tail; if (__netif_subqueue_stopped(bp->dev, queue_index) && @@ -1519,9 +1551,27 @@ static int gem_rx_data_len(struct macb *bp, struct macb_queue *queue, return len; } +static unsigned int gem_rx_pad(struct macb *bp) +{ + if (rcu_access_pointer(bp->prog)) + return XDP_PACKET_HEADROOM; + + return NET_SKB_PAD; +} + +static unsigned int gem_max_rx_data_size(int base_sz) +{ + return SKB_DATA_ALIGN(base_sz + ETH_HLEN + ETH_FCS_LEN); +} + +static unsigned int __gem_total_rx_buffer_size(int data_sz, unsigned int headroom) +{ + return SKB_HEAD_ALIGN(data_sz + headroom); +} + static unsigned int gem_total_rx_buffer_size(struct macb *bp) { - return SKB_HEAD_ALIGN(bp->rx_buffer_size + NET_SKB_PAD); + return __gem_total_rx_buffer_size(bp->rx_buffer_size, gem_rx_pad(bp)); } static int gem_rx_refill(struct macb_queue *queue, bool napi) @@ -1558,7 +1608,8 @@ static int gem_rx_refill(struct macb_queue *queue, bool napi) break; } - paddr = page_pool_get_dma_addr(page) + NET_SKB_PAD + offset; + paddr = page_pool_get_dma_addr(page) + + gem_rx_pad(bp) + offset; dma_sync_single_for_device(&bp->pdev->dev, paddr, bp->rx_buffer_size, @@ -1612,12 +1663,156 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin, */ } +static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf, + struct net_device *dev, dma_addr_t addr) +{ + struct macb_tx_buff *tx_buff; + int cpu = smp_processor_id(); + struct macb_dma_desc *desc; + struct macb_queue *queue; + unsigned int next_head; + unsigned long flags; + u16 queue_index; + int err = 0; + u32 ctrl; + + queue_index = cpu % bp->num_queues; + queue = &bp->queues[queue_index]; + + spin_lock_irqsave(&queue->tx_ptr_lock, flags); + + /* This is a hard error, log it. */ + if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) { + netif_stop_subqueue(dev, queue_index); + netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n", + queue->tx_head, queue->tx_tail); + err = -ENOMEM; + goto unlock; + } + + /* progs can adjust the head. Sync and set the adjusted one. + * This also implicitly takes into account ip alignment, + * if present. + */ + addr += xdpf->headroom + sizeof(*xdpf); + + dma_sync_single_for_device(&bp->pdev->dev, addr, + xdpf->len, DMA_BIDIRECTIONAL); + + next_head = queue->tx_head + 1; + + ctrl = MACB_BIT(TX_USED); + desc = macb_tx_desc(queue, next_head); + desc->ctrl = ctrl; + + desc = macb_tx_desc(queue, queue->tx_head); + tx_buff = macb_tx_buff(queue, queue->tx_head); + tx_buff->ptr = xdpf; + tx_buff->type = MACB_TYPE_XDP_TX; + tx_buff->mapping = 0; + tx_buff->size = xdpf->len; + tx_buff->mapped_as_page = false; + + ctrl = (u32)tx_buff->size; + ctrl |= MACB_BIT(TX_LAST); + + if (unlikely(macb_tx_ring_wrap(bp, queue->tx_head) == (bp->tx_ring_size - 1))) + ctrl |= MACB_BIT(TX_WRAP); + + /* Set TX buffer descriptor */ + macb_set_addr(bp, desc, addr); + /* desc->addr must be visible to hardware before clearing + * 'TX_USED' bit in desc->ctrl. + */ + wmb(); + desc->ctrl = ctrl; + queue->tx_head = next_head; + + /* Make newly initialized descriptor visible to hardware */ + wmb(); + + spin_lock(&bp->lock); + macb_tx_lpi_wake(bp); + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); + spin_unlock(&bp->lock); + + if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) + netif_stop_subqueue(dev, queue_index); + +unlock: + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); + + return err; +} + +static u32 gem_xdp_run(struct macb_queue *queue, void *buff_head, + unsigned int *len, unsigned int *headroom, + dma_addr_t addr) +{ + struct net_device *dev; + struct xdp_frame *xdpf; + struct bpf_prog *prog; + struct xdp_buff xdp; + + u32 act = XDP_PASS; + + rcu_read_lock(); + + prog = rcu_dereference(queue->bp->prog); + if (!prog) + goto out; + + xdp_init_buff(&xdp, gem_total_rx_buffer_size(queue->bp), &queue->xdp_rxq); + xdp_prepare_buff(&xdp, buff_head, *headroom, *len, false); + xdp_buff_clear_frags_flag(&xdp); + dev = queue->bp->dev; + + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + *len = xdp.data_end - xdp.data; + *headroom = xdp.data - xdp.data_hard_start; + goto out; + case XDP_REDIRECT: + if (unlikely(xdp_do_redirect(dev, &xdp, prog))) { + act = XDP_DROP; + break; + } + goto out; + case XDP_TX: + xdpf = xdp_convert_buff_to_frame(&xdp); + if (unlikely(!xdpf) || macb_xdp_submit_frame(queue->bp, xdpf, + dev, addr)) { + act = XDP_DROP; + break; + } + goto out; + default: + bpf_warn_invalid_xdp_action(dev, prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dev, prog, act); + fallthrough; + case XDP_DROP: + break; + } + + page_pool_put_full_page(queue->page_pool, + virt_to_head_page(xdp.data), true); +out: + rcu_read_unlock(); + + return act; +} + static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, int budget) { struct skb_shared_info *shinfo; struct macb *bp = queue->bp; struct macb_dma_desc *desc; + bool xdp_flush = false; + unsigned int headroom; unsigned int entry; struct page *page; void *buff_head; @@ -1625,11 +1820,11 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, int data_len; int nr_frags; - while (count < budget) { bool rxused, first_frame, last_frame; dma_addr_t addr; u32 ctrl; + u32 ret; entry = macb_rx_ring_wrap(bp, queue->rx_tail); desc = macb_rx_desc(queue, entry); @@ -1669,9 +1864,9 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, if (data_len < 0) goto free_frags; - addr += first_frame ? bp->rx_ip_align : 0; - - dma_sync_single_for_cpu(&bp->pdev->dev, addr, data_len, + dma_sync_single_for_cpu(&bp->pdev->dev, + addr + (first_frame ? bp->rx_ip_align : 0), + data_len, page_pool_get_dma_dir(queue->page_pool)); if (first_frame) { @@ -1683,6 +1878,18 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, queue->stats.rx_dropped++; } + headroom = bp->rx_headroom; + + if (last_frame) { + ret = gem_xdp_run(queue, buff_head, &data_len, + &headroom, addr - gem_rx_pad(bp)); + if (ret == XDP_REDIRECT) + xdp_flush = true; + + if (ret != XDP_PASS) + goto next_frame; + } + queue->skb = napi_build_skb(buff_head, gem_total_rx_buffer_size(bp)); if (unlikely(!queue->skb)) { if (net_ratelimit()) @@ -1702,7 +1909,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, * setting the low 2/3 bits. * It is 3 bits if HW_DMA_CAP_PTP, else 2 bits. */ - skb_reserve(queue->skb, bp->rx_headroom); + skb_reserve(queue->skb, headroom); skb_mark_for_recycle(queue->skb); skb_put(queue->skb, data_len); } else { @@ -1714,15 +1921,11 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, goto free_frags; skb_add_rx_frag(queue->skb, nr_frags, page, - buff_head - page_address(page) + NET_SKB_PAD, + buff_head - page_address(page) + gem_rx_pad(bp), data_len, gem_total_rx_buffer_size(bp)); } /* now everything is ready for receiving packet */ - queue->rx_buff[entry] = NULL; - - netdev_vdbg(bp->dev, "%s %u (len %u)\n", __func__, entry, data_len); - if (last_frame) { bp->dev->stats.rx_packets++; queue->stats.rx_packets++; @@ -1750,6 +1953,8 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, queue->skb = NULL; } +next_frame: + queue->rx_buff[entry] = NULL; continue; free_frags: @@ -1768,6 +1973,9 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, queue->rx_buff[entry] = NULL; } + if (xdp_flush) + xdp_do_flush(); + gem_rx_refill(queue, true); return count; @@ -2709,13 +2917,13 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) static void macb_init_rx_buffer_size(struct macb *bp, unsigned int mtu) { unsigned int overhead; - size_t size; if (!macb_is_gem(bp)) { bp->rx_buffer_size = MACB_RX_BUFFER_SIZE; } else { - size = mtu + ETH_HLEN + ETH_FCS_LEN; - bp->rx_buffer_size = SKB_DATA_ALIGN(size + bp->rx_ip_align); + bp->rx_headroom = gem_rx_pad(bp) + bp->rx_ip_align; + bp->rx_buffer_size = gem_max_rx_data_size(mtu + bp->rx_ip_align); + if (gem_total_rx_buffer_size(bp) > PAGE_SIZE) { overhead = bp->rx_headroom + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -2766,6 +2974,8 @@ static void gem_free_rx_buffers(struct macb *bp) kfree(queue->rx_buff); queue->rx_buff = NULL; + if (xdp_rxq_info_is_reg(&queue->xdp_rxq)) + xdp_rxq_info_unreg(&queue->xdp_rxq); page_pool_destroy(queue->page_pool); queue->page_pool = NULL; } @@ -2922,37 +3132,62 @@ static int macb_alloc_consistent(struct macb *bp) return -ENOMEM; } -static int gem_create_page_pool(struct macb_queue *queue) +static int gem_create_page_pool(struct macb_queue *queue, int qid) { struct page_pool_params pp_params = { .order = 0, .flags = PP_FLAG_DMA_MAP, .pool_size = queue->bp->rx_ring_size, .nid = NUMA_NO_NODE, - .dma_dir = DMA_FROM_DEVICE, + .dma_dir = rcu_access_pointer(queue->bp->prog) + ? DMA_BIDIRECTIONAL + : DMA_FROM_DEVICE, .dev = &queue->bp->pdev->dev, .netdev = queue->bp->dev, .napi = &queue->napi_rx, .max_len = PAGE_SIZE, }; struct page_pool *pool; - int err = 0; + int err; /* This can happen in the case of HRESP error. * Do nothing as page pool is already existing. */ if (queue->page_pool) - return err; + return 0; pool = page_pool_create(&pp_params); if (IS_ERR(pool)) { netdev_err(queue->bp->dev, "cannot create rx page pool\n"); err = PTR_ERR(pool); - pool = NULL; + goto clear_pool; } queue->page_pool = pool; + err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->bp->dev, qid, + queue->napi_rx.napi_id); + if (err < 0) { + netdev_err(queue->bp->dev, "xdp: failed to register rxq info\n"); + goto destroy_pool; + } + + err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq, MEM_TYPE_PAGE_POOL, + queue->page_pool); + if (err) { + netdev_err(queue->bp->dev, "xdp: failed to register rxq memory model\n"); + goto unreg_info; + } + + return 0; + +unreg_info: + xdp_rxq_info_unreg(&queue->xdp_rxq); +destroy_pool: + page_pool_destroy(pool); +clear_pool: + queue->page_pool = NULL; + return err; } @@ -2999,7 +3234,7 @@ static int gem_init_rings(struct macb *bp, bool fail_early) /* This is a hard failure. In case of HRESP error * recovery we always reuse the existing page pool. */ - last_err = gem_create_page_pool(queue); + last_err = gem_create_page_pool(queue, q); if (last_err) break; @@ -3449,11 +3684,24 @@ static int macb_close(struct net_device *dev) return 0; } +static bool gem_xdp_valid_mtu(struct macb *bp, int mtu) +{ + return __gem_total_rx_buffer_size(gem_max_rx_data_size(mtu + bp->rx_ip_align), + XDP_PACKET_HEADROOM) <= PAGE_SIZE; +} + static int macb_change_mtu(struct net_device *dev, int new_mtu) { + struct macb *bp = netdev_priv(dev); + if (netif_running(dev)) return -EBUSY; + if (rcu_access_pointer(bp->prog) && !gem_xdp_valid_mtu(bp, new_mtu)) { + netdev_err(dev, "MTU %d too large for XDP\n", new_mtu); + return -EINVAL; + } + WRITE_ONCE(dev->mtu, new_mtu); return 0; @@ -3471,6 +3719,55 @@ static int macb_set_mac_addr(struct net_device *dev, void *addr) return 0; } +static int gem_xdp_setup(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct macb *bp = netdev_priv(dev); + struct bpf_prog *old_prog; + bool need_update, running; + int err; + + if (prog && !gem_xdp_valid_mtu(bp, dev->mtu)) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); + return -EOPNOTSUPP; + } + + running = netif_running(dev); + need_update = !!bp->prog != !!prog; + if (running && need_update) + macb_close(dev); + + old_prog = rcu_replace_pointer(bp->prog, prog, lockdep_rtnl_is_held()); + + if (running && need_update) { + err = macb_open(dev); + if (err) { + rcu_assign_pointer(bp->prog, old_prog); + return err; + } + } + + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int gem_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct macb *bp = netdev_priv(dev); + + if (!macb_is_gem(bp)) + return -EOPNOTSUPP; + + switch (xdp->command) { + case XDP_SETUP_PROG: + return gem_xdp_setup(dev, xdp->prog, xdp->extack); + default: + return -EOPNOTSUPP; + } +} + static void gem_update_stats(struct macb *bp) { struct macb_queue *queue; @@ -4745,6 +5042,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_hwtstamp_set = macb_hwtstamp_set, .ndo_hwtstamp_get = macb_hwtstamp_get, .ndo_setup_tc = macb_setup_tc, + .ndo_bpf = gem_xdp, }; /* Configure peripheral capabilities according to device tree @@ -6031,11 +6329,11 @@ static int macb_probe(struct platform_device *pdev) goto err_out_phy_exit; if (macb_is_gem(bp)) { - bp->rx_headroom = NET_SKB_PAD; - if (!(bp->caps & MACB_CAPS_RSC)) { + if (!(bp->caps & MACB_CAPS_RSC)) bp->rx_ip_align = NET_IP_ALIGN; - bp->rx_headroom += NET_IP_ALIGN; - } + + dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT; } netif_carrier_off(dev); -- 2.53.0 Introduce ndo_xdp_xmit function for redirection, update macb_xdp_submit_frame() accordingly, and advertise NETDEV_XDP_ACT_NDO_XMIT capability. Signed-off-by: Paolo Valerio Tested-by: Nicolai Buchwitz --- drivers/net/ethernet/cadence/macb_main.c | 73 +++++++++++++++++++----- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 537d02264851..38b15d1e7127 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1202,11 +1202,13 @@ static void macb_tx_release_buff(void *buff, enum macb_tx_buff_type type, int bu { if (type == MACB_TYPE_SKB) { napi_consume_skb(buff, budget); - } else { + } else if (type == MACB_TYPE_XDP_TX) { if (!budget) xdp_return_frame(buff); else xdp_return_frame_rx_napi(buff); + } else { + xdp_return_frame(buff); } } @@ -1664,20 +1666,24 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin, } static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf, - struct net_device *dev, dma_addr_t addr) + struct net_device *dev, bool dma_map, + dma_addr_t addr) { + enum macb_tx_buff_type buff_type; struct macb_tx_buff *tx_buff; int cpu = smp_processor_id(); struct macb_dma_desc *desc; struct macb_queue *queue; unsigned int next_head; unsigned long flags; + dma_addr_t mapping; u16 queue_index; int err = 0; u32 ctrl; queue_index = cpu % bp->num_queues; queue = &bp->queues[queue_index]; + buff_type = dma_map ? MACB_TYPE_XDP_NDO : MACB_TYPE_XDP_TX; spin_lock_irqsave(&queue->tx_ptr_lock, flags); @@ -1690,14 +1696,23 @@ static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf, goto unlock; } - /* progs can adjust the head. Sync and set the adjusted one. - * This also implicitly takes into account ip alignment, - * if present. - */ - addr += xdpf->headroom + sizeof(*xdpf); - - dma_sync_single_for_device(&bp->pdev->dev, addr, - xdpf->len, DMA_BIDIRECTIONAL); + if (dma_map) { + mapping = dma_map_single(&bp->pdev->dev, + xdpf->data, + xdpf->len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { + err = -ENOMEM; + goto unlock; + } + } else { + /* progs can adjust the head. Sync and set the adjusted one. + * This also implicitly takes into account ip alignment, + * if present. + */ + mapping = addr + xdpf->headroom + sizeof(*xdpf); + dma_sync_single_for_device(&bp->pdev->dev, mapping, + xdpf->len, DMA_BIDIRECTIONAL); + } next_head = queue->tx_head + 1; @@ -1708,8 +1723,8 @@ static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf, desc = macb_tx_desc(queue, queue->tx_head); tx_buff = macb_tx_buff(queue, queue->tx_head); tx_buff->ptr = xdpf; - tx_buff->type = MACB_TYPE_XDP_TX; - tx_buff->mapping = 0; + tx_buff->type = buff_type; + tx_buff->mapping = dma_map ? mapping : 0; tx_buff->size = xdpf->len; tx_buff->mapped_as_page = false; @@ -1720,7 +1735,7 @@ static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf, ctrl |= MACB_BIT(TX_WRAP); /* Set TX buffer descriptor */ - macb_set_addr(bp, desc, addr); + macb_set_addr(bp, desc, mapping); /* desc->addr must be visible to hardware before clearing * 'TX_USED' bit in desc->ctrl. */ @@ -1745,6 +1760,32 @@ static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf, return err; } +static int gem_xdp_xmit(struct net_device *dev, int num_frame, + struct xdp_frame **frames, u32 flags) +{ + struct macb *bp = netdev_priv(dev); + u32 xmitted = 0; + int i; + + if (!macb_is_gem(bp)) + return -EOPNOTSUPP; + + if (unlikely(!netif_carrier_ok(dev))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + for (i = 0; i < num_frame; i++) { + if (macb_xdp_submit_frame(bp, frames[i], dev, true, 0)) + break; + + xmitted++; + } + + return xmitted; +} + static u32 gem_xdp_run(struct macb_queue *queue, void *buff_head, unsigned int *len, unsigned int *headroom, dma_addr_t addr) @@ -1782,7 +1823,7 @@ static u32 gem_xdp_run(struct macb_queue *queue, void *buff_head, case XDP_TX: xdpf = xdp_convert_buff_to_frame(&xdp); if (unlikely(!xdpf) || macb_xdp_submit_frame(queue->bp, xdpf, - dev, addr)) { + dev, false, addr)) { act = XDP_DROP; break; } @@ -5043,6 +5084,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_hwtstamp_get = macb_hwtstamp_get, .ndo_setup_tc = macb_setup_tc, .ndo_bpf = gem_xdp, + .ndo_xdp_xmit = gem_xdp_xmit, }; /* Configure peripheral capabilities according to device tree @@ -6333,7 +6375,8 @@ static int macb_probe(struct platform_device *pdev) bp->rx_ip_align = NET_IP_ALIGN; dev->xdp_features = NETDEV_XDP_ACT_BASIC | - NETDEV_XDP_ACT_REDIRECT; + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; } netif_carrier_off(dev); -- 2.53.0