Introduce basic XDP support for macb/gem with the XDP_TX, XDP_PASS, XDP_DROP, XDP_REDIRECT verdict support. Signed-off-by: Paolo Valerio Tested-by: Nicolai Buchwitz --- drivers/net/ethernet/cadence/macb.h | 3 + drivers/net/ethernet/cadence/macb_main.c | 366 ++++++++++++++++++++--- 2 files changed, 335 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index d8c581394b98..a1cec805ee92 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -15,6 +15,7 @@ #include #include #include +#include #define MACB_GREGS_NBR 16 #define MACB_GREGS_VERSION 2 @@ -1293,6 +1294,7 @@ struct macb_queue { struct queue_stats stats; struct page_pool *page_pool; struct sk_buff *skb; + struct xdp_rxq_info xdp_rxq; }; struct ethtool_rx_fs_item { @@ -1398,6 +1400,7 @@ struct macb { struct macb_pm_data pm_data; const struct macb_usrio_config *usrio; + struct bpf_prog __rcu *prog; }; #ifdef CONFIG_MACB_USE_HWSTAMP diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4c5dd70f003b..537d02264851 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -6,6 +6,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1197,6 +1198,18 @@ static int macb_halt_tx(struct macb *bp) bp, TSR); } +static void macb_tx_release_buff(void *buff, enum macb_tx_buff_type type, int budget) +{ + if (type == MACB_TYPE_SKB) { + napi_consume_skb(buff, budget); + } else { + if (!budget) + xdp_return_frame(buff); + else + xdp_return_frame_rx_napi(buff); + } +} + static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff, int budget) { @@ -1211,7 +1224,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff, } if (tx_buff->ptr) { - napi_consume_skb(tx_buff->ptr, budget); + macb_tx_release_buff(tx_buff->ptr, tx_buff->type, budget); tx_buff->ptr = NULL; } } @@ -1275,7 +1288,8 @@ static void macb_tx_error_task(struct work_struct *work) * network engine about the macb/gem being halted. */ napi_disable(&queue->napi_tx); - spin_lock_irqsave(&bp->lock, flags); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); + spin_lock(&bp->lock); /* Make sure nobody is trying to queue up new packets */ netif_tx_stop_all_queues(bp->dev); @@ -1299,6 +1313,10 @@ static void macb_tx_error_task(struct work_struct *work) desc = macb_tx_desc(queue, tail); ctrl = desc->ctrl; tx_buff = macb_tx_buff(queue, tail); + + if (tx_buff->type != MACB_TYPE_SKB) + goto unmap; + skb = tx_buff->ptr; if (ctrl & MACB_BIT(TX_USED)) { @@ -1336,6 +1354,7 @@ static void macb_tx_error_task(struct work_struct *work) desc->ctrl = ctrl | MACB_BIT(TX_USED); } +unmap: macb_tx_unmap(bp, tx_buff, 0); } @@ -1367,7 +1386,8 @@ static void macb_tx_error_task(struct work_struct *work) netif_tx_start_all_queues(bp->dev); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - spin_unlock_irqrestore(&bp->lock, flags); + spin_unlock(&bp->lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); napi_enable(&queue->napi_tx); } @@ -1405,6 +1425,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) { struct macb *bp = queue->bp; unsigned long flags; + int skb_packets = 0; unsigned int tail; unsigned int head; u16 queue_index; @@ -1419,6 +1440,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) struct macb_tx_buff *tx_buff; struct macb_dma_desc *desc; struct sk_buff *skb; + void *data = NULL; u32 ctrl; desc = macb_tx_desc(queue, tail); @@ -1437,10 +1459,18 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) /* Process all buffers of the current transmitted frame */ for (;; tail++) { tx_buff = macb_tx_buff(queue, tail); - skb = tx_buff->ptr; + + if (tx_buff->type != MACB_TYPE_SKB) { + data = tx_buff->ptr; + packets++; + goto unmap; + } /* First, update TX stats if needed */ - if (skb) { + if (tx_buff->ptr) { + data = tx_buff->ptr; + skb = tx_buff->ptr; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && !ptp_one_step_sync(skb)) gem_ptp_do_txstamp(bp, skb, desc); @@ -1452,24 +1482,26 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) queue->stats.tx_packets++; bp->dev->stats.tx_bytes += skb->len; queue->stats.tx_bytes += skb->len; + skb_packets++; packets++; bytes += skb->len; } +unmap: /* Now we can safely release resources */ macb_tx_unmap(bp, tx_buff, budget); - /* skb is set only for the last buffer of the frame. - * WARNING: at this point skb has been freed by + /* data is set only for the last buffer of the frame. + * WARNING: at this point the buffer has been freed by * macb_tx_unmap(). */ - if (skb) + if (data) break; } } netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index), - packets, bytes); + skb_packets, bytes); queue->tx_tail = tail; if (__netif_subqueue_stopped(bp->dev, queue_index) && @@ -1519,9 +1551,27 @@ static int gem_rx_data_len(struct macb *bp, struct macb_queue *queue, return len; } +static unsigned int gem_rx_pad(struct macb *bp) +{ + if (rcu_access_pointer(bp->prog)) + return XDP_PACKET_HEADROOM; + + return NET_SKB_PAD; +} + +static unsigned int gem_max_rx_data_size(int base_sz) +{ + return SKB_DATA_ALIGN(base_sz + ETH_HLEN + ETH_FCS_LEN); +} + +static unsigned int __gem_total_rx_buffer_size(int data_sz, unsigned int headroom) +{ + return SKB_HEAD_ALIGN(data_sz + headroom); +} + static unsigned int gem_total_rx_buffer_size(struct macb *bp) { - return SKB_HEAD_ALIGN(bp->rx_buffer_size + NET_SKB_PAD); + return __gem_total_rx_buffer_size(bp->rx_buffer_size, gem_rx_pad(bp)); } static int gem_rx_refill(struct macb_queue *queue, bool napi) @@ -1558,7 +1608,8 @@ static int gem_rx_refill(struct macb_queue *queue, bool napi) break; } - paddr = page_pool_get_dma_addr(page) + NET_SKB_PAD + offset; + paddr = page_pool_get_dma_addr(page) + + gem_rx_pad(bp) + offset; dma_sync_single_for_device(&bp->pdev->dev, paddr, bp->rx_buffer_size, @@ -1612,12 +1663,156 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin, */ } +static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf, + struct net_device *dev, dma_addr_t addr) +{ + struct macb_tx_buff *tx_buff; + int cpu = smp_processor_id(); + struct macb_dma_desc *desc; + struct macb_queue *queue; + unsigned int next_head; + unsigned long flags; + u16 queue_index; + int err = 0; + u32 ctrl; + + queue_index = cpu % bp->num_queues; + queue = &bp->queues[queue_index]; + + spin_lock_irqsave(&queue->tx_ptr_lock, flags); + + /* This is a hard error, log it. */ + if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) { + netif_stop_subqueue(dev, queue_index); + netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n", + queue->tx_head, queue->tx_tail); + err = -ENOMEM; + goto unlock; + } + + /* progs can adjust the head. Sync and set the adjusted one. + * This also implicitly takes into account ip alignment, + * if present. + */ + addr += xdpf->headroom + sizeof(*xdpf); + + dma_sync_single_for_device(&bp->pdev->dev, addr, + xdpf->len, DMA_BIDIRECTIONAL); + + next_head = queue->tx_head + 1; + + ctrl = MACB_BIT(TX_USED); + desc = macb_tx_desc(queue, next_head); + desc->ctrl = ctrl; + + desc = macb_tx_desc(queue, queue->tx_head); + tx_buff = macb_tx_buff(queue, queue->tx_head); + tx_buff->ptr = xdpf; + tx_buff->type = MACB_TYPE_XDP_TX; + tx_buff->mapping = 0; + tx_buff->size = xdpf->len; + tx_buff->mapped_as_page = false; + + ctrl = (u32)tx_buff->size; + ctrl |= MACB_BIT(TX_LAST); + + if (unlikely(macb_tx_ring_wrap(bp, queue->tx_head) == (bp->tx_ring_size - 1))) + ctrl |= MACB_BIT(TX_WRAP); + + /* Set TX buffer descriptor */ + macb_set_addr(bp, desc, addr); + /* desc->addr must be visible to hardware before clearing + * 'TX_USED' bit in desc->ctrl. + */ + wmb(); + desc->ctrl = ctrl; + queue->tx_head = next_head; + + /* Make newly initialized descriptor visible to hardware */ + wmb(); + + spin_lock(&bp->lock); + macb_tx_lpi_wake(bp); + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); + spin_unlock(&bp->lock); + + if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) + netif_stop_subqueue(dev, queue_index); + +unlock: + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); + + return err; +} + +static u32 gem_xdp_run(struct macb_queue *queue, void *buff_head, + unsigned int *len, unsigned int *headroom, + dma_addr_t addr) +{ + struct net_device *dev; + struct xdp_frame *xdpf; + struct bpf_prog *prog; + struct xdp_buff xdp; + + u32 act = XDP_PASS; + + rcu_read_lock(); + + prog = rcu_dereference(queue->bp->prog); + if (!prog) + goto out; + + xdp_init_buff(&xdp, gem_total_rx_buffer_size(queue->bp), &queue->xdp_rxq); + xdp_prepare_buff(&xdp, buff_head, *headroom, *len, false); + xdp_buff_clear_frags_flag(&xdp); + dev = queue->bp->dev; + + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + *len = xdp.data_end - xdp.data; + *headroom = xdp.data - xdp.data_hard_start; + goto out; + case XDP_REDIRECT: + if (unlikely(xdp_do_redirect(dev, &xdp, prog))) { + act = XDP_DROP; + break; + } + goto out; + case XDP_TX: + xdpf = xdp_convert_buff_to_frame(&xdp); + if (unlikely(!xdpf) || macb_xdp_submit_frame(queue->bp, xdpf, + dev, addr)) { + act = XDP_DROP; + break; + } + goto out; + default: + bpf_warn_invalid_xdp_action(dev, prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dev, prog, act); + fallthrough; + case XDP_DROP: + break; + } + + page_pool_put_full_page(queue->page_pool, + virt_to_head_page(xdp.data), true); +out: + rcu_read_unlock(); + + return act; +} + static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, int budget) { struct skb_shared_info *shinfo; struct macb *bp = queue->bp; struct macb_dma_desc *desc; + bool xdp_flush = false; + unsigned int headroom; unsigned int entry; struct page *page; void *buff_head; @@ -1625,11 +1820,11 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, int data_len; int nr_frags; - while (count < budget) { bool rxused, first_frame, last_frame; dma_addr_t addr; u32 ctrl; + u32 ret; entry = macb_rx_ring_wrap(bp, queue->rx_tail); desc = macb_rx_desc(queue, entry); @@ -1669,9 +1864,9 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, if (data_len < 0) goto free_frags; - addr += first_frame ? bp->rx_ip_align : 0; - - dma_sync_single_for_cpu(&bp->pdev->dev, addr, data_len, + dma_sync_single_for_cpu(&bp->pdev->dev, + addr + (first_frame ? bp->rx_ip_align : 0), + data_len, page_pool_get_dma_dir(queue->page_pool)); if (first_frame) { @@ -1683,6 +1878,18 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, queue->stats.rx_dropped++; } + headroom = bp->rx_headroom; + + if (last_frame) { + ret = gem_xdp_run(queue, buff_head, &data_len, + &headroom, addr - gem_rx_pad(bp)); + if (ret == XDP_REDIRECT) + xdp_flush = true; + + if (ret != XDP_PASS) + goto next_frame; + } + queue->skb = napi_build_skb(buff_head, gem_total_rx_buffer_size(bp)); if (unlikely(!queue->skb)) { if (net_ratelimit()) @@ -1702,7 +1909,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, * setting the low 2/3 bits. * It is 3 bits if HW_DMA_CAP_PTP, else 2 bits. */ - skb_reserve(queue->skb, bp->rx_headroom); + skb_reserve(queue->skb, headroom); skb_mark_for_recycle(queue->skb); skb_put(queue->skb, data_len); } else { @@ -1714,15 +1921,11 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, goto free_frags; skb_add_rx_frag(queue->skb, nr_frags, page, - buff_head - page_address(page) + NET_SKB_PAD, + buff_head - page_address(page) + gem_rx_pad(bp), data_len, gem_total_rx_buffer_size(bp)); } /* now everything is ready for receiving packet */ - queue->rx_buff[entry] = NULL; - - netdev_vdbg(bp->dev, "%s %u (len %u)\n", __func__, entry, data_len); - if (last_frame) { bp->dev->stats.rx_packets++; queue->stats.rx_packets++; @@ -1750,6 +1953,8 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, queue->skb = NULL; } +next_frame: + queue->rx_buff[entry] = NULL; continue; free_frags: @@ -1768,6 +1973,9 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, queue->rx_buff[entry] = NULL; } + if (xdp_flush) + xdp_do_flush(); + gem_rx_refill(queue, true); return count; @@ -2709,13 +2917,13 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) static void macb_init_rx_buffer_size(struct macb *bp, unsigned int mtu) { unsigned int overhead; - size_t size; if (!macb_is_gem(bp)) { bp->rx_buffer_size = MACB_RX_BUFFER_SIZE; } else { - size = mtu + ETH_HLEN + ETH_FCS_LEN; - bp->rx_buffer_size = SKB_DATA_ALIGN(size + bp->rx_ip_align); + bp->rx_headroom = gem_rx_pad(bp) + bp->rx_ip_align; + bp->rx_buffer_size = gem_max_rx_data_size(mtu + bp->rx_ip_align); + if (gem_total_rx_buffer_size(bp) > PAGE_SIZE) { overhead = bp->rx_headroom + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -2766,6 +2974,8 @@ static void gem_free_rx_buffers(struct macb *bp) kfree(queue->rx_buff); queue->rx_buff = NULL; + if (xdp_rxq_info_is_reg(&queue->xdp_rxq)) + xdp_rxq_info_unreg(&queue->xdp_rxq); page_pool_destroy(queue->page_pool); queue->page_pool = NULL; } @@ -2922,37 +3132,62 @@ static int macb_alloc_consistent(struct macb *bp) return -ENOMEM; } -static int gem_create_page_pool(struct macb_queue *queue) +static int gem_create_page_pool(struct macb_queue *queue, int qid) { struct page_pool_params pp_params = { .order = 0, .flags = PP_FLAG_DMA_MAP, .pool_size = queue->bp->rx_ring_size, .nid = NUMA_NO_NODE, - .dma_dir = DMA_FROM_DEVICE, + .dma_dir = rcu_access_pointer(queue->bp->prog) + ? DMA_BIDIRECTIONAL + : DMA_FROM_DEVICE, .dev = &queue->bp->pdev->dev, .netdev = queue->bp->dev, .napi = &queue->napi_rx, .max_len = PAGE_SIZE, }; struct page_pool *pool; - int err = 0; + int err; /* This can happen in the case of HRESP error. * Do nothing as page pool is already existing. */ if (queue->page_pool) - return err; + return 0; pool = page_pool_create(&pp_params); if (IS_ERR(pool)) { netdev_err(queue->bp->dev, "cannot create rx page pool\n"); err = PTR_ERR(pool); - pool = NULL; + goto clear_pool; } queue->page_pool = pool; + err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->bp->dev, qid, + queue->napi_rx.napi_id); + if (err < 0) { + netdev_err(queue->bp->dev, "xdp: failed to register rxq info\n"); + goto destroy_pool; + } + + err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq, MEM_TYPE_PAGE_POOL, + queue->page_pool); + if (err) { + netdev_err(queue->bp->dev, "xdp: failed to register rxq memory model\n"); + goto unreg_info; + } + + return 0; + +unreg_info: + xdp_rxq_info_unreg(&queue->xdp_rxq); +destroy_pool: + page_pool_destroy(pool); +clear_pool: + queue->page_pool = NULL; + return err; } @@ -2999,7 +3234,7 @@ static int gem_init_rings(struct macb *bp, bool fail_early) /* This is a hard failure. In case of HRESP error * recovery we always reuse the existing page pool. */ - last_err = gem_create_page_pool(queue); + last_err = gem_create_page_pool(queue, q); if (last_err) break; @@ -3449,11 +3684,24 @@ static int macb_close(struct net_device *dev) return 0; } +static bool gem_xdp_valid_mtu(struct macb *bp, int mtu) +{ + return __gem_total_rx_buffer_size(gem_max_rx_data_size(mtu + bp->rx_ip_align), + XDP_PACKET_HEADROOM) <= PAGE_SIZE; +} + static int macb_change_mtu(struct net_device *dev, int new_mtu) { + struct macb *bp = netdev_priv(dev); + if (netif_running(dev)) return -EBUSY; + if (rcu_access_pointer(bp->prog) && !gem_xdp_valid_mtu(bp, new_mtu)) { + netdev_err(dev, "MTU %d too large for XDP\n", new_mtu); + return -EINVAL; + } + WRITE_ONCE(dev->mtu, new_mtu); return 0; @@ -3471,6 +3719,55 @@ static int macb_set_mac_addr(struct net_device *dev, void *addr) return 0; } +static int gem_xdp_setup(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct macb *bp = netdev_priv(dev); + struct bpf_prog *old_prog; + bool need_update, running; + int err; + + if (prog && !gem_xdp_valid_mtu(bp, dev->mtu)) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); + return -EOPNOTSUPP; + } + + running = netif_running(dev); + need_update = !!bp->prog != !!prog; + if (running && need_update) + macb_close(dev); + + old_prog = rcu_replace_pointer(bp->prog, prog, lockdep_rtnl_is_held()); + + if (running && need_update) { + err = macb_open(dev); + if (err) { + rcu_assign_pointer(bp->prog, old_prog); + return err; + } + } + + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int gem_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct macb *bp = netdev_priv(dev); + + if (!macb_is_gem(bp)) + return -EOPNOTSUPP; + + switch (xdp->command) { + case XDP_SETUP_PROG: + return gem_xdp_setup(dev, xdp->prog, xdp->extack); + default: + return -EOPNOTSUPP; + } +} + static void gem_update_stats(struct macb *bp) { struct macb_queue *queue; @@ -4745,6 +5042,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_hwtstamp_set = macb_hwtstamp_set, .ndo_hwtstamp_get = macb_hwtstamp_get, .ndo_setup_tc = macb_setup_tc, + .ndo_bpf = gem_xdp, }; /* Configure peripheral capabilities according to device tree @@ -6031,11 +6329,11 @@ static int macb_probe(struct platform_device *pdev) goto err_out_phy_exit; if (macb_is_gem(bp)) { - bp->rx_headroom = NET_SKB_PAD; - if (!(bp->caps & MACB_CAPS_RSC)) { + if (!(bp->caps & MACB_CAPS_RSC)) bp->rx_ip_align = NET_IP_ALIGN; - bp->rx_headroom += NET_IP_ALIGN; - } + + dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT; } netif_carrier_off(dev); -- 2.53.0