Since recently, page_pool_create() accepts optional stack index of the Rx queue which the pool will be created for. It can then be used on control path for stuff like memory providers. Add the same field to libeth_fq and pass the index from all the drivers using libeth for managing Rx to simplify implementing MP support later. idpf has one libeth_fq per buffer/fill queue and each Rx queue has two fill queues, but since fill queues can never be shared, we can store the corresponding Rx queue index there during the initialization to pass it to libeth. Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 2 ++ include/net/libeth/rx.h | 2 ++ drivers/net/ethernet/intel/iavf/iavf_txrx.c | 1 + drivers/net/ethernet/intel/ice/ice_base.c | 2 ++ drivers/net/ethernet/intel/idpf/idpf_txrx.c | 13 +++++++++++++ drivers/net/ethernet/intel/libeth/rx.c | 1 + 6 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 6796f010e382..0eaebac8ceae 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -748,6 +748,7 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64, * @size: Length of descriptor ring in bytes * @dma: Physical address of ring * @q_vector: Backreference to associated vector + * @rxq_idx: stack index of the corresponding Rx queue * @rx_buffer_low_watermark: RX buffer low watermark * @rx_hbuf_size: Header buffer size * @rx_buf_size: Buffer size @@ -791,6 +792,7 @@ struct idpf_buf_queue { dma_addr_t dma; struct idpf_q_vector *q_vector; + u16 rxq_idx; u16 rx_buffer_low_watermark; u16 rx_hbuf_size; diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h index 0e736846c5e8..db838ef7f9bb 100644 --- a/include/net/libeth/rx.h +++ b/include/net/libeth/rx.h @@ -72,6 +72,7 @@ enum libeth_fqe_type { * @no_napi: the queue is not a data queue and does not have NAPI * @buf_len: HW-writeable length per each buffer * @nid: ID of the closest NUMA node with memory + * @idx: stack index of the corresponding Rx queue */ struct libeth_fq { struct_group_tagged(libeth_fq_fp, fp, @@ -90,6 +91,7 @@ struct libeth_fq { u32 buf_len; int nid; + u32 idx; }; int libeth_rx_fq_create(struct libeth_fq *fq, void *napi_dev); diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 275b11dd0c60..3d938d7ab2cc 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -771,6 +771,7 @@ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring) .count = rx_ring->count, .buf_len = LIBIE_MAX_RX_BUF_LEN, .nid = NUMA_NO_NODE, + .idx = rx_ring->queue_index, }; int ret; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 6fb7051aa463..7097324c38f3 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -607,6 +607,7 @@ static int ice_rxq_pp_create(struct ice_rx_ring *rq) struct libeth_fq fq = { .count = rq->count, .nid = NUMA_NO_NODE, + .idx = rq->q_index, .hsplit = rq->vsi->hsplit, .xdp = ice_is_xdp_ena_vsi(rq->vsi), .buf_len = LIBIE_MAX_RX_BUF_LEN, @@ -629,6 +630,7 @@ static int ice_rxq_pp_create(struct ice_rx_ring *rq) .count = rq->count, .type = LIBETH_FQE_HDR, .nid = NUMA_NO_NODE, + .idx = rq->q_index, .xdp = ice_is_xdp_ena_vsi(rq->vsi), }; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 72215612b460..5dc41b7ba609 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -561,6 +561,7 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) .type = LIBETH_FQE_HDR, .xdp = idpf_xdp_enabled(bufq->q_vector->vport), .nid = idpf_q_vector_to_mem(bufq->q_vector), + .idx = bufq->rxq_idx, }; int ret; @@ -703,6 +704,7 @@ static int idpf_rx_bufs_init_singleq(struct idpf_rx_queue *rxq) .type = LIBETH_FQE_MTU, .buf_len = IDPF_RX_MAX_BUF_SZ, .nid = idpf_q_vector_to_mem(rxq->q_vector), + .idx = rxq->idx, }; int ret; @@ -763,6 +765,7 @@ static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq, .hsplit = idpf_queue_has(HSPLIT_EN, bufq), .xdp = idpf_xdp_enabled(bufq->q_vector->vport), .nid = idpf_q_vector_to_mem(bufq->q_vector), + .idx = bufq->rxq_idx, }; int ret; @@ -1922,6 +1925,16 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, LIBETH_RX_LL_LEN; idpf_rxq_set_descids(rsrc, q); } + + if (!idpf_is_queue_model_split(rsrc->rxq_model)) + continue; + + for (u32 j = 0; j < rsrc->num_bufqs_per_qgrp; j++) { + struct idpf_buf_queue *bufq; + + bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; + bufq->rxq_idx = rx_qgrp->splitq.rxq_sets[0]->rxq.idx; + } } err_alloc: diff --git a/drivers/net/ethernet/intel/libeth/rx.c b/drivers/net/ethernet/intel/libeth/rx.c index 1d8248a31037..9ac3a1448b2f 100644 --- a/drivers/net/ethernet/intel/libeth/rx.c +++ b/drivers/net/ethernet/intel/libeth/rx.c @@ -157,6 +157,7 @@ int libeth_rx_fq_create(struct libeth_fq *fq, void *napi_dev) .order = LIBETH_RX_PAGE_ORDER, .pool_size = fq->count, .nid = fq->nid, + .queue_idx = fq->idx, .dev = napi ? napi->dev->dev.parent : napi_dev, .netdev = napi ? napi->dev : NULL, .napi = napi, -- 2.52.0 libeth uses netmems for quite some time already, so in order to support unreadable frags / memory providers, it only needs to set PP_FLAG_ALLOW_UNREADABLE_NETMEM when needed. Also add a couple sanity checks to make sure the driver didn't mess up the configuration options and, in case when an MP is installed, return the truesize always equal to PAGE_SIZE, so that libeth_rx_alloc() will never try to allocate frags. Memory providers manage buffers on their own and expect 1:1 buffer / HW Rx descriptor association. Bonus: mention in the libeth_sqe_type description that LIBETH_SQE_EMPTY should also be used for netmem Tx SQEs -- they don't need DMA unmapping. Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Signed-off-by: Alexander Lobakin --- include/net/libeth/tx.h | 2 +- drivers/net/ethernet/intel/libeth/rx.c | 45 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h index c3db5c6f1641..a66fc2b3a114 100644 --- a/include/net/libeth/tx.h +++ b/include/net/libeth/tx.h @@ -12,7 +12,7 @@ /** * enum libeth_sqe_type - type of &libeth_sqe to act on Tx completion - * @LIBETH_SQE_EMPTY: unused/empty OR XDP_TX/XSk frame, no action required + * @LIBETH_SQE_EMPTY: empty OR netmem/XDP_TX/XSk frame, no action required * @LIBETH_SQE_CTX: context descriptor with empty SQE, no action required * @LIBETH_SQE_SLAB: kmalloc-allocated buffer, unmap and kfree() * @LIBETH_SQE_FRAG: mapped skb frag, only unmap DMA diff --git a/drivers/net/ethernet/intel/libeth/rx.c b/drivers/net/ethernet/intel/libeth/rx.c index 9ac3a1448b2f..9b45c9cdd599 100644 --- a/drivers/net/ethernet/intel/libeth/rx.c +++ b/drivers/net/ethernet/intel/libeth/rx.c @@ -6,6 +6,7 @@ #include #include +#include /* Rx buffer management */ @@ -139,9 +140,50 @@ static bool libeth_rx_page_pool_params_zc(struct libeth_fq *fq, fq->buf_len = clamp(mtu, LIBETH_RX_BUF_STRIDE, max); fq->truesize = fq->buf_len; + /* + * Allow frags only for kernel pages. `fq->truesize == pp->max_len` + * will always fall back to regular page_pool_alloc_netmems() + * regardless of the MTU / FQ buffer size. + */ + if (pp->flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) + fq->truesize = pp->max_len; + return true; } +/** + * libeth_rx_page_pool_check_unread - check input params for unreadable MPs + * @fq: buffer queue to check + * @pp: &page_pool_params for the queue + * + * Make sure we don't create an invalid pool with full-frame unreadable + * buffers, bidirectional unreadable buffers or so, and configure the + * ZC payload pool accordingly. + * + * Return: true on success, false on invalid input params. + */ +static bool libeth_rx_page_pool_check_unread(const struct libeth_fq *fq, + struct page_pool_params *pp) +{ + if (!pp->netdev) + return true; + + if (!netif_rxq_has_unreadable_mp(pp->netdev, pp->queue_idx)) + return true; + + /* For now, the core stack doesn't allow XDP with unreadable frags */ + if (fq->xdp) + return false; + + /* It should be either a header pool or a ZC payload pool */ + if (fq->type == LIBETH_FQE_HDR) + return !fq->hsplit; + + pp->flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; + + return fq->hsplit; +} + /** * libeth_rx_fq_create - create a PP with the default libeth settings * @fq: buffer queue struct to fill @@ -166,6 +208,9 @@ int libeth_rx_fq_create(struct libeth_fq *fq, void *napi_dev) struct page_pool *pool; int ret; + if (!libeth_rx_page_pool_check_unread(fq, &pp)) + return -EINVAL; + pp.dma_dir = fq->xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; if (!fq->hsplit) -- 2.52.0 Queue management ops unconditionally enable netdev locking. The same lock is taken by default by several NAPI configuration functions, such as napi_enable() and netif_napi_set_irq(). Request ops locking in advance and make sure we use the _locked counterparts of those functions to avoid deadlocks, taking the lock manually where needed (suspend/resume, queue rebuild and resets). Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/ice/ice_lib.h | 6 ++- drivers/net/ethernet/intel/ice/ice_lib.c | 56 +++++++++++++++++---- drivers/net/ethernet/intel/ice/ice_main.c | 49 ++++++++++-------- drivers/net/ethernet/intel/ice/ice_sf_eth.c | 1 + drivers/net/ethernet/intel/ice/ice_xsk.c | 4 +- 5 files changed, 82 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 49454d98dcfe..347b63e497e7 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -53,9 +53,11 @@ struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params); void ice_vsi_set_napi_queues(struct ice_vsi *vsi); -void ice_napi_add(struct ice_vsi *vsi); - +void ice_vsi_set_napi_queues_locked(struct ice_vsi *vsi); void ice_vsi_clear_napi_queues(struct ice_vsi *vsi); +void ice_vsi_clear_napi_queues_locked(struct ice_vsi *vsi); + +void ice_napi_add(struct ice_vsi *vsi); int ice_vsi_release(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 17d92ba65128..ac5d95a28f72 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2703,7 +2703,7 @@ void ice_vsi_close(struct ice_vsi *vsi) if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) ice_down(vsi); - ice_vsi_clear_napi_queues(vsi); + ice_vsi_clear_napi_queues_locked(vsi); ice_vsi_free_irq(vsi); ice_vsi_free_tx_rings(vsi); ice_vsi_free_rx_rings(vsi); @@ -2772,12 +2772,13 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) } /** - * ice_vsi_set_napi_queues - associate netdev queues with napi + * ice_vsi_set_napi_queues_locked - associate netdev queues with napi * @vsi: VSI pointer * * Associate queue[s] with napi for all vectors. + * Must be called only with the netdev_lock taken. */ -void ice_vsi_set_napi_queues(struct ice_vsi *vsi) +void ice_vsi_set_napi_queues_locked(struct ice_vsi *vsi) { struct net_device *netdev = vsi->netdev; int q_idx, v_idx; @@ -2785,7 +2786,6 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi) if (!netdev) return; - ASSERT_RTNL(); ice_for_each_rxq(vsi, q_idx) netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX, &vsi->rx_rings[q_idx]->q_vector->napi); @@ -2797,17 +2797,37 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); + netif_napi_set_irq_locked(&q_vector->napi, q_vector->irq.virq); } } /** - * ice_vsi_clear_napi_queues - dissociate netdev queues from napi + * ice_vsi_set_napi_queues - associate VSI queues with NAPIs * @vsi: VSI pointer * + * Version of ice_vsi_set_napi_queues_locked() that takes the netdev_lock, + * to use it outside of the net_device_ops context. + */ +void ice_vsi_set_napi_queues(struct ice_vsi *vsi) +{ + struct net_device *netdev = vsi->netdev; + + if (!netdev) + return; + + netdev_lock(netdev); + ice_vsi_set_napi_queues_locked(vsi); + netdev_unlock(netdev); +} + +/** + * ice_vsi_clear_napi_queues_locked - dissociate netdev queues from napi + * @vsi: VSI to process + * * Clear the association between all VSI queues queue[s] and napi. + * Must be called only with the netdev_lock taken. */ -void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) +void ice_vsi_clear_napi_queues_locked(struct ice_vsi *vsi) { struct net_device *netdev = vsi->netdev; int q_idx, v_idx; @@ -2815,12 +2835,11 @@ void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) if (!netdev) return; - ASSERT_RTNL(); /* Clear the NAPI's interrupt number */ ice_for_each_q_vector(vsi, v_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - netif_napi_set_irq(&q_vector->napi, -1); + netif_napi_set_irq_locked(&q_vector->napi, -1); } ice_for_each_txq(vsi, q_idx) @@ -2830,6 +2849,25 @@ void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX, NULL); } +/** + * ice_vsi_clear_napi_queues - dissociate VSI queues from NAPIs + * @vsi: VSI to process + * + * Version of ice_vsi_clear_napi_queues_locked() that takes the netdev lock, + * to use it outside of the net_device_ops context. + */ +void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) +{ + struct net_device *netdev = vsi->netdev; + + if (!netdev) + return; + + netdev_lock(netdev); + ice_vsi_clear_napi_queues_locked(vsi); + netdev_unlock(netdev); +} + /** * ice_napi_add - register NAPI handler for the VSI * @vsi: VSI for which NAPI handler is to be registered diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 96730bf288fd..881b41d37821 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3523,6 +3523,7 @@ static void ice_set_ops(struct ice_vsi *vsi) } netdev->netdev_ops = &ice_netdev_ops; + netdev->request_ops_lock = true; netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic; netdev->xdp_metadata_ops = &ice_xdp_md_ops; ice_set_ethtool_ops(netdev); @@ -5533,16 +5534,17 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) /* Remap vectors and rings, after successful re-init interrupts */ ice_for_each_vsi(pf, v) { - if (!pf->vsi[v]) + struct ice_vsi *vsi = pf->vsi[v]; + + if (!vsi) continue; - ret = ice_vsi_alloc_q_vectors(pf->vsi[v]); + ret = ice_vsi_alloc_q_vectors(vsi); if (ret) goto err_reinit; - ice_vsi_map_rings_to_vectors(pf->vsi[v]); - rtnl_lock(); - ice_vsi_set_napi_queues(pf->vsi[v]); - rtnl_unlock(); + + ice_vsi_map_rings_to_vectors(vsi); + ice_vsi_set_napi_queues(vsi); } ret = ice_req_irq_msix_misc(pf); @@ -5555,13 +5557,15 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) return 0; err_reinit: - while (v--) - if (pf->vsi[v]) { - rtnl_lock(); - ice_vsi_clear_napi_queues(pf->vsi[v]); - rtnl_unlock(); - ice_vsi_free_q_vectors(pf->vsi[v]); - } + while (v--) { + struct ice_vsi *vsi = pf->vsi[v]; + + if (!vsi) + continue; + + ice_vsi_clear_napi_queues(vsi); + ice_vsi_free_q_vectors(vsi); + } return ret; } @@ -5623,14 +5627,17 @@ static int ice_suspend(struct device *dev) * to CPU0. */ ice_free_irq_msix_misc(pf); + ice_for_each_vsi(pf, v) { - if (!pf->vsi[v]) + struct ice_vsi *vsi = pf->vsi[v]; + + if (!vsi) continue; - rtnl_lock(); - ice_vsi_clear_napi_queues(pf->vsi[v]); - rtnl_unlock(); - ice_vsi_free_q_vectors(pf->vsi[v]); + + ice_vsi_clear_napi_queues(vsi); + ice_vsi_free_q_vectors(vsi); } + ice_clear_interrupt_scheme(pf); pci_save_state(pdev); @@ -6744,7 +6751,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) ice_init_moderation(q_vector); if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) - napi_enable(&q_vector->napi); + napi_enable_locked(&q_vector->napi); } } @@ -7243,7 +7250,7 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) - napi_disable(&q_vector->napi); + napi_disable_locked(&q_vector->napi); cancel_work_sync(&q_vector->tx.dim.work); cancel_work_sync(&q_vector->rx.dim.work); @@ -7543,7 +7550,7 @@ int ice_vsi_open(struct ice_vsi *vsi) if (err) goto err_set_qs; - ice_vsi_set_napi_queues(vsi); + ice_vsi_set_napi_queues_locked(vsi); } err = ice_up_complete(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_sf_eth.c b/drivers/net/ethernet/intel/ice/ice_sf_eth.c index 1a2c94375ca7..2c3db1b03055 100644 --- a/drivers/net/ethernet/intel/ice/ice_sf_eth.c +++ b/drivers/net/ethernet/intel/ice/ice_sf_eth.c @@ -58,6 +58,7 @@ static int ice_sf_cfg_netdev(struct ice_dynamic_port *dyn_port, eth_hw_addr_set(netdev, dyn_port->hw_addr); ether_addr_copy(netdev->perm_addr, dyn_port->hw_addr); netdev->netdev_ops = &ice_sf_netdev_ops; + netdev->request_ops_lock = true; SET_NETDEV_DEVLINK_PORT(netdev, devlink_port); err = register_netdev(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 953e68ed0f9a..6d08a11a86a8 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -33,9 +33,9 @@ ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, return; if (enable) - napi_enable(&q_vector->napi); + napi_enable_locked(&q_vector->napi); else - napi_disable(&q_vector->napi); + napi_disable_locked(&q_vector->napi); } /** -- 2.52.0 Now ice is ready to get queue_mgmt_ops support. It already has API to disable/reconfig/enable one particular queue (for XSk). Reuse as much of its code as possible to implement Rx queue management callbacks and vice versa -- ice_queue_mem_{alloc,free}() can be reused during ifup/ifdown to elide code duplication. With this, ice passes the io_uring zcrx selftests, meaning the Rx part of netmem/MP support is done. Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/ice/ice_lib.h | 5 + drivers/net/ethernet/intel/ice/ice_txrx.h | 2 + drivers/net/ethernet/intel/ice/ice_base.c | 192 ++++++++++++++------ drivers/net/ethernet/intel/ice/ice_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_sf_eth.c | 2 +- drivers/net/ethernet/intel/ice/ice_txrx.c | 26 ++- 6 files changed, 163 insertions(+), 66 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 347b63e497e7..2d3168458891 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -4,6 +4,8 @@ #ifndef _ICE_LIB_H_ #define _ICE_LIB_H_ +#include + #include "ice.h" #include "ice_vlan.h" @@ -132,4 +134,7 @@ void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f); void ice_init_feature_support(struct ice_pf *pf); bool ice_vsi_is_rx_queue_active(struct ice_vsi *vsi); void ice_vsi_update_l2tsel(struct ice_vsi *vsi, enum ice_l2tsel l2tsel); + +extern const struct netdev_queue_mgmt_ops ice_queue_mgmt_ops; + #endif /* !_ICE_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index c51b1e60f717..e3c682723107 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -440,6 +440,8 @@ u16 ice_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); void ice_clean_tx_ring(struct ice_tx_ring *tx_ring); +void ice_queue_mem_free(struct net_device *dev, void *per_queue_mem); +void ice_zero_rx_ring(struct ice_rx_ring *rx_ring); void ice_clean_rx_ring(struct ice_rx_ring *rx_ring); int ice_setup_tx_ring(struct ice_tx_ring *tx_ring); int ice_setup_rx_ring(struct ice_rx_ring *rx_ring); diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 7097324c38f3..4479bb5e66ac 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -651,6 +651,42 @@ static int ice_rxq_pp_create(struct ice_rx_ring *rq) return err; } +static int ice_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, + int idx) +{ + const struct ice_netdev_priv *priv = netdev_priv(dev); + const struct ice_rx_ring *real = priv->vsi->rx_rings[idx]; + struct ice_rx_ring *new = per_queue_mem; + int ret; + + new->count = real->count; + new->netdev = real->netdev; + new->q_index = real->q_index; + new->q_vector = real->q_vector; + new->vsi = real->vsi; + + ret = ice_rxq_pp_create(new); + if (ret) + return ret; + + if (!netif_running(dev)) + return 0; + + ret = __xdp_rxq_info_reg(&new->xdp_rxq, new->netdev, new->q_index, + new->q_vector->napi.napi_id, new->rx_buf_len); + if (ret) + goto err_destroy_fq; + + xdp_rxq_info_attach_page_pool(&new->xdp_rxq, new->pp); + + return 0; + +err_destroy_fq: + ice_rxq_pp_destroy(new); + + return ret; +} + /** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured @@ -665,23 +701,12 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) int err; if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->rx_buf_len); - if (err) - return err; - } - ice_rx_xsk_pool(ring); err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool); if (err) return err; if (ring->xsk_pool) { - xdp_rxq_info_unreg(&ring->xdp_rxq); - rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, @@ -700,20 +725,10 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { - err = ice_rxq_pp_create(ring); + err = ice_queue_mem_alloc(ring->netdev, ring, + ring->q_index); if (err) return err; - - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->rx_buf_len); - if (err) - goto err_destroy_fq; - } - xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, - ring->pp); } } @@ -722,7 +737,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (err) { dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n", ring->q_index, err); - goto err_destroy_fq; + goto err_clean_rq; } if (ring->xsk_pool) { @@ -753,12 +768,12 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) err = ice_alloc_rx_bufs(ring, num_bufs); if (err) - goto err_destroy_fq; + goto err_clean_rq; return 0; -err_destroy_fq: - ice_rxq_pp_destroy(ring); +err_clean_rq: + ice_clean_rx_ring(ring); return err; } @@ -1425,27 +1440,7 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats)); } -/** - * ice_qp_clean_rings - Cleans all the rings of a given index - * @vsi: VSI that contains rings of interest - * @q_idx: ring index in array - */ -static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) -{ - ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (vsi->xdp_rings) - ice_clean_tx_ring(vsi->xdp_rings[q_idx]); - ice_clean_rx_ring(vsi->rx_rings[q_idx]); -} - -/** - * ice_qp_dis - Disables a queue pair - * @vsi: VSI of interest - * @q_idx: ring index in array - * - * Returns 0 on success, negative on failure. - */ -int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) +static int __ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) { struct ice_txq_meta txq_meta = { }; struct ice_q_vector *q_vector; @@ -1484,23 +1479,35 @@ int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) } ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false); - ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); + ice_clean_tx_ring(vsi->tx_rings[q_idx]); + if (vsi->xdp_rings) + ice_clean_tx_ring(vsi->xdp_rings[q_idx]); + return fail; } /** - * ice_qp_ena - Enables a queue pair + * ice_qp_dis - Disables a queue pair * @vsi: VSI of interest * @q_idx: ring index in array * * Returns 0 on success, negative on failure. */ -int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) +int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) +{ + int ret; + + ret = __ice_qp_dis(vsi, q_idx); + ice_clean_rx_ring(vsi->rx_rings[q_idx]); + + return ret; +} + +static int __ice_qp_ena(struct ice_vsi *vsi, u16 q_idx, int fail) { struct ice_q_vector *q_vector; - int fail = 0; bool link_up; int err; @@ -1518,10 +1525,6 @@ int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) ice_tx_xsk_pool(vsi, q_idx); } - err = ice_vsi_cfg_single_rxq(vsi, q_idx); - if (!fail) - fail = err; - q_vector = vsi->rx_rings[q_idx]->q_vector; ice_qvec_cfg_msix(vsi, q_vector, q_idx); @@ -1542,3 +1545,80 @@ int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) return fail; } + +/** + * ice_qp_ena - Enables a queue pair + * @vsi: VSI of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) +{ + return __ice_qp_ena(vsi, q_idx, ice_vsi_cfg_single_rxq(vsi, q_idx)); +} + +static int ice_queue_start(struct net_device *dev, void *per_queue_mem, + int idx) +{ + const struct ice_netdev_priv *priv = netdev_priv(dev); + struct ice_rx_ring *real = priv->vsi->rx_rings[idx]; + struct ice_rx_ring *new = per_queue_mem; + struct napi_struct *napi; + int ret; + + real->pp = new->pp; + real->rx_fqes = new->rx_fqes; + real->hdr_fqes = new->hdr_fqes; + real->hdr_pp = new->hdr_pp; + + real->hdr_truesize = new->hdr_truesize; + real->truesize = new->truesize; + real->rx_hdr_len = new->rx_hdr_len; + real->rx_buf_len = new->rx_buf_len; + + memcpy(&real->xdp_rxq, &new->xdp_rxq, sizeof(new->xdp_rxq)); + + ret = ice_setup_rx_ctx(real); + if (ret) + return ret; + + napi = &real->q_vector->napi; + + page_pool_enable_direct_recycling(real->pp, napi); + if (real->hdr_pp) + page_pool_enable_direct_recycling(real->hdr_pp, napi); + + ret = ice_alloc_rx_bufs(real, ICE_DESC_UNUSED(real)); + + return __ice_qp_ena(priv->vsi, idx, ret); +} + +static int ice_queue_stop(struct net_device *dev, void *per_queue_mem, + int idx) +{ + const struct ice_netdev_priv *priv = netdev_priv(dev); + struct ice_rx_ring *real = priv->vsi->rx_rings[idx]; + int ret; + + ret = __ice_qp_dis(priv->vsi, idx); + if (ret) + return ret; + + page_pool_disable_direct_recycling(real->pp); + if (real->hdr_pp) + page_pool_disable_direct_recycling(real->hdr_pp); + + ice_zero_rx_ring(real); + memcpy(per_queue_mem, real, sizeof(*real)); + + return 0; +} + +const struct netdev_queue_mgmt_ops ice_queue_mgmt_ops = { + .ndo_queue_mem_alloc = ice_queue_mem_alloc, + .ndo_queue_mem_free = ice_queue_mem_free, + .ndo_queue_mem_size = sizeof(struct ice_rx_ring), + .ndo_queue_start = ice_queue_start, + .ndo_queue_stop = ice_queue_stop, +}; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 881b41d37821..25e9091ca309 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3523,7 +3523,7 @@ static void ice_set_ops(struct ice_vsi *vsi) } netdev->netdev_ops = &ice_netdev_ops; - netdev->request_ops_lock = true; + netdev->queue_mgmt_ops = &ice_queue_mgmt_ops; netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic; netdev->xdp_metadata_ops = &ice_xdp_md_ops; ice_set_ethtool_ops(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_sf_eth.c b/drivers/net/ethernet/intel/ice/ice_sf_eth.c index 2c3db1b03055..41e1606a8222 100644 --- a/drivers/net/ethernet/intel/ice/ice_sf_eth.c +++ b/drivers/net/ethernet/intel/ice/ice_sf_eth.c @@ -58,7 +58,7 @@ static int ice_sf_cfg_netdev(struct ice_dynamic_port *dyn_port, eth_hw_addr_set(netdev, dyn_port->hw_addr); ether_addr_copy(netdev->perm_addr, dyn_port->hw_addr); netdev->netdev_ops = &ice_sf_netdev_ops; - netdev->request_ops_lock = true; + netdev->queue_mgmt_ops = &ice_queue_mgmt_ops; SET_NETDEV_DEVLINK_PORT(netdev, devlink_port); err = register_netdev(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 47d44796635a..b6f56cb81f93 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -530,17 +530,13 @@ void ice_rxq_pp_destroy(struct ice_rx_ring *rq) rq->hdr_pp = NULL; } -/** - * ice_clean_rx_ring - Free Rx buffers - * @rx_ring: ring to be cleaned - */ -void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) +void ice_queue_mem_free(struct net_device *dev, void *per_queue_mem) { - u32 size; + struct ice_rx_ring *rx_ring = per_queue_mem; if (rx_ring->xsk_pool) { ice_xsk_clean_rx_ring(rx_ring); - goto rx_skip_free; + return; } /* ring already cleared, nothing to do */ @@ -567,8 +563,12 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) } ice_rxq_pp_destroy(rx_ring); +} + +void ice_zero_rx_ring(struct ice_rx_ring *rx_ring) +{ + size_t size; -rx_skip_free: /* Zero out the descriptor ring */ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), PAGE_SIZE); @@ -579,6 +579,16 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) rx_ring->next_to_use = 0; } +/** + * ice_clean_rx_ring - Free Rx buffers + * @rx_ring: ring to be cleaned + */ +void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) +{ + ice_queue_mem_free(rx_ring->netdev, rx_ring); + ice_zero_rx_ring(rx_ring); +} + /** * ice_free_rx_ring - Free Rx resources * @rx_ring: ring to clean the resources from -- 2.52.0 Advertise netmem Tx support in ice. The only change needed is to set ICE_TX_BUF_FRAG conditionally, only when skb_frag_is_net_iov() is false. Otherwise, the Tx buffer type will be ICE_TX_BUF_EMPTY and the driver will skip the DMA unmapping operation. Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/ice/ice_main.c | 1 + drivers/net/ethernet/intel/ice/ice_sf_eth.c | 1 + drivers/net/ethernet/intel/ice/ice_txrx.c | 17 +++++++++++++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 25e9091ca309..66601b1b7fec 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3524,6 +3524,7 @@ static void ice_set_ops(struct ice_vsi *vsi) netdev->netdev_ops = &ice_netdev_ops; netdev->queue_mgmt_ops = &ice_queue_mgmt_ops; + netdev->netmem_tx = true; netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic; netdev->xdp_metadata_ops = &ice_xdp_md_ops; ice_set_ethtool_ops(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_sf_eth.c b/drivers/net/ethernet/intel/ice/ice_sf_eth.c index 41e1606a8222..51ad13c9d7f9 100644 --- a/drivers/net/ethernet/intel/ice/ice_sf_eth.c +++ b/drivers/net/ethernet/intel/ice/ice_sf_eth.c @@ -59,6 +59,7 @@ static int ice_sf_cfg_netdev(struct ice_dynamic_port *dyn_port, ether_addr_copy(netdev->perm_addr, dyn_port->hw_addr); netdev->netdev_ops = &ice_sf_netdev_ops; netdev->queue_mgmt_ops = &ice_queue_mgmt_ops; + netdev->netmem_tx = true; SET_NETDEV_DEVLINK_PORT(netdev, devlink_port); err = register_netdev(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index b6f56cb81f93..e8e1acbd5a7d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -113,11 +113,17 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, static void ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) { - if (tx_buf->type != ICE_TX_BUF_XDP_TX && dma_unmap_len(tx_buf, len)) + switch (tx_buf->type) { + case ICE_TX_BUF_DUMMY: + case ICE_TX_BUF_FRAG: + case ICE_TX_BUF_SKB: + case ICE_TX_BUF_XDP_XMIT: dma_unmap_page(ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + break; + } switch (tx_buf->type) { case ICE_TX_BUF_DUMMY: @@ -337,12 +343,14 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) } /* unmap any remaining paged data */ - if (dma_unmap_len(tx_buf, len)) { + if (tx_buf->type != ICE_TX_BUF_EMPTY) { dma_unmap_page(tx_ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + tx_buf->type = ICE_TX_BUF_EMPTY; } } ice_trace(clean_tx_irq_unmap_eop, tx_ring, tx_desc, tx_buf); @@ -1493,7 +1501,8 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first, DMA_TO_DEVICE); tx_buf = &tx_ring->tx_buf[i]; - tx_buf->type = ICE_TX_BUF_FRAG; + if (!skb_frag_is_net_iov(frag)) + tx_buf->type = ICE_TX_BUF_FRAG; } /* record SW timestamp if HW timestamp is not available */ @@ -2368,7 +2377,7 @@ void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring) } /* unmap the data header */ - if (dma_unmap_len(tx_buf, len)) + if (tx_buf->type != ICE_TX_BUF_EMPTY) dma_unmap_single(tx_ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), -- 2.52.0