Each port for a given ICSSG instance has their own set of Tx and Rx queues. Add functions to create and destroy these queues, which will be further used while performing ndo_bpf operations to set up XSK Tx/Rx queues for a given port. In the destroy Rx queue sequence add teardown wait to ensure that all the descriptors including the TDCM (teardown completion marker) have been serviced and freed to avoid any sort of descriptor leaks. Signed-off-by: Meghana Malladi --- drivers/net/ethernet/ti/icssg/icssg_common.c | 10 +- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 177 ++++++++++++------- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 + 3 files changed, 127 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 57e5f1c88f50..580a968594fc 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -719,8 +719,10 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) return ret; } - if (cppi5_desc_is_tdcm(desc_dma)) /* Teardown ? */ + if (cppi5_desc_is_tdcm(desc_dma)) { + complete(&emac->tdown_complete); return 0; + } desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); @@ -804,7 +806,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) return ret; } -static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) +void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) { struct prueth_rx_chn *rx_chn = data; struct cppi5_host_desc_t *desc_rx; @@ -822,6 +824,7 @@ static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); } +EXPORT_SYMBOL_GPL(prueth_rx_cleanup); static int prueth_tx_ts_cookie_get(struct prueth_emac *emac) { @@ -1025,7 +1028,7 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev } EXPORT_SYMBOL_GPL(icssg_ndo_start_xmit); -static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) +void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) { struct prueth_tx_chn *tx_chn = data; struct cppi5_host_desc_t *desc_tx; @@ -1051,6 +1054,7 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) prueth_xmit_free(tx_chn, desc_tx); } +EXPORT_SYMBOL_GPL(prueth_tx_cleanup); irqreturn_t prueth_rx_irq(int irq, void *dev_id) { diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index dadce6009791..5e9926699805 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -727,6 +727,114 @@ static int icssg_update_vlan_mcast(struct net_device *vdev, int vid, return 0; } +static void prueth_destroy_txq(struct prueth_emac *emac) +{ + int ret, i; + + atomic_set(&emac->tdown_cnt, emac->tx_ch_num); + /* ensure new tdown_cnt value is visible */ + smp_mb__after_atomic(); + /* tear down and disable UDMA channels */ + reinit_completion(&emac->tdown_complete); + for (i = 0; i < emac->tx_ch_num; i++) + k3_udma_glue_tdown_tx_chn(emac->tx_chns[i].tx_chn, false); + + ret = wait_for_completion_timeout(&emac->tdown_complete, + msecs_to_jiffies(1000)); + if (!ret) + netdev_err(emac->ndev, "tx teardown timeout\n"); + + for (i = 0; i < emac->tx_ch_num; i++) { + napi_disable(&emac->tx_chns[i].napi_tx); + hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer); + k3_udma_glue_reset_tx_chn(emac->tx_chns[i].tx_chn, + &emac->tx_chns[i], + prueth_tx_cleanup); + k3_udma_glue_disable_tx_chn(emac->tx_chns[i].tx_chn); + } +} + +static void prueth_destroy_rxq(struct prueth_emac *emac) +{ + int i, ret; + + /* tear down and disable UDMA channels */ + reinit_completion(&emac->tdown_complete); + k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); + + /* When RX DMA Channel Teardown is initiated, it will result in an + * interrupt and a Teardown Completion Marker (TDCM) is queued into + * the RX Completion queue. Acknowledging the interrupt involves + * popping the TDCM descriptor from the RX Completion queue via the + * RX NAPI Handler. To avoid timing out when waiting for the TDCM to + * be popped, schedule the RX NAPI handler to run immediately. + */ + if (!napi_if_scheduled_mark_missed(&emac->napi_rx)) { + if (napi_schedule_prep(&emac->napi_rx)) + __napi_schedule(&emac->napi_rx); + } + + ret = wait_for_completion_timeout(&emac->tdown_complete, + msecs_to_jiffies(1000)); + if (!ret) + netdev_err(emac->ndev, "rx teardown timeout\n"); + + for (i = 0; i < PRUETH_MAX_RX_FLOWS; i++) { + napi_disable(&emac->napi_rx); + hrtimer_cancel(&emac->rx_hrtimer); + k3_udma_glue_reset_rx_chn(emac->rx_chns.rx_chn, i, + &emac->rx_chns, + prueth_rx_cleanup); + } + + prueth_destroy_xdp_rxqs(emac); + k3_udma_glue_disable_rx_chn(emac->rx_chns.rx_chn); +} + +static int prueth_create_txq(struct prueth_emac *emac) +{ + int ret, i; + + for (i = 0; i < emac->tx_ch_num; i++) { + ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn); + if (ret) + goto reset_tx_chan; + napi_enable(&emac->tx_chns[i].napi_tx); + } + return 0; + +reset_tx_chan: + /* Since interface is not yet up, there is wouldn't be + * any SKB for completion. So set false to free_skb + */ + prueth_reset_tx_chan(emac, i, false); + return ret; +} + +static int prueth_create_rxq(struct prueth_emac *emac) +{ + int ret; + + ret = prueth_prepare_rx_chan(emac, &emac->rx_chns, PRUETH_MAX_PKT_SIZE); + if (ret) + return ret; + + ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); + if (ret) + goto reset_rx_chn; + + ret = prueth_create_xdp_rxqs(emac); + if (ret) + goto reset_rx_chn; + + napi_enable(&emac->napi_rx); + return 0; + +reset_rx_chn: + prueth_reset_rx_chan(&emac->rx_chns, PRUETH_MAX_RX_FLOWS, false); + return ret; +} + /** * emac_ndo_open - EMAC device open * @ndev: network adapter device @@ -738,7 +846,7 @@ static int icssg_update_vlan_mcast(struct net_device *vdev, int vid, static int emac_ndo_open(struct net_device *ndev) { struct prueth_emac *emac = netdev_priv(ndev); - int ret, i, num_data_chn = emac->tx_ch_num; + int ret, num_data_chn = emac->tx_ch_num; struct icssg_flow_cfg __iomem *flow_cfg; struct prueth *prueth = emac->prueth; int slice = prueth_emac_slice(emac); @@ -811,28 +919,13 @@ static int emac_ndo_open(struct net_device *ndev) goto stop; /* Prepare RX */ - ret = prueth_prepare_rx_chan(emac, &emac->rx_chns, PRUETH_MAX_PKT_SIZE); + ret = prueth_create_rxq(emac); if (ret) goto free_tx_ts_irq; - ret = prueth_create_xdp_rxqs(emac); - if (ret) - goto reset_rx_chn; - - ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); + ret = prueth_create_txq(emac); if (ret) - goto destroy_xdp_rxqs; - - for (i = 0; i < emac->tx_ch_num; i++) { - ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn); - if (ret) - goto reset_tx_chan; - } - - /* Enable NAPI in Tx and Rx direction */ - for (i = 0; i < emac->tx_ch_num; i++) - napi_enable(&emac->tx_chns[i].napi_tx); - napi_enable(&emac->napi_rx); + goto destroy_rxq; /* start PHY */ phy_start(ndev->phydev); @@ -843,15 +936,8 @@ static int emac_ndo_open(struct net_device *ndev) return 0; -reset_tx_chan: - /* Since interface is not yet up, there is wouldn't be - * any SKB for completion. So set false to free_skb - */ - prueth_reset_tx_chan(emac, i, false); -destroy_xdp_rxqs: - prueth_destroy_xdp_rxqs(emac); -reset_rx_chn: - prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false); +destroy_rxq: + prueth_destroy_rxq(emac); free_tx_ts_irq: free_irq(emac->tx_ts_irq, emac); stop: @@ -881,9 +967,6 @@ static int emac_ndo_stop(struct net_device *ndev) { struct prueth_emac *emac = netdev_priv(ndev); struct prueth *prueth = emac->prueth; - int rx_flow = PRUETH_RX_FLOW_DATA; - int max_rx_flows; - int ret, i; /* inform the upper layers. */ netif_tx_stop_all_queues(ndev); @@ -897,32 +980,8 @@ static int emac_ndo_stop(struct net_device *ndev) else __dev_mc_unsync(ndev, icssg_prueth_del_mcast); - atomic_set(&emac->tdown_cnt, emac->tx_ch_num); - /* ensure new tdown_cnt value is visible */ - smp_mb__after_atomic(); - /* tear down and disable UDMA channels */ - reinit_completion(&emac->tdown_complete); - for (i = 0; i < emac->tx_ch_num; i++) - k3_udma_glue_tdown_tx_chn(emac->tx_chns[i].tx_chn, false); - - ret = wait_for_completion_timeout(&emac->tdown_complete, - msecs_to_jiffies(1000)); - if (!ret) - netdev_err(ndev, "tx teardown timeout\n"); - - prueth_reset_tx_chan(emac, emac->tx_ch_num, true); - for (i = 0; i < emac->tx_ch_num; i++) { - napi_disable(&emac->tx_chns[i].napi_tx); - hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer); - } - - max_rx_flows = PRUETH_MAX_RX_FLOWS; - k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); - - prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true); - prueth_destroy_xdp_rxqs(emac); - napi_disable(&emac->napi_rx); - hrtimer_cancel(&emac->rx_hrtimer); + prueth_destroy_txq(emac); + prueth_destroy_rxq(emac); cancel_work_sync(&emac->rx_mode_work); @@ -935,10 +994,10 @@ static int emac_ndo_stop(struct net_device *ndev) free_irq(emac->tx_ts_irq, emac); - free_irq(emac->rx_chns.irq[rx_flow], emac); + free_irq(emac->rx_chns.irq[PRUETH_RX_FLOW_DATA], emac); prueth_ndev_del_tx_napi(emac, emac->tx_ch_num); - prueth_cleanup_rx_chns(emac, &emac->rx_chns, max_rx_flows); + prueth_cleanup_rx_chns(emac, &emac->rx_chns, PRUETH_MAX_RX_FLOWS); prueth_cleanup_tx_chns(emac); prueth->emacs_initialized--; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index ca8a22a4a5da..e0dadbfca45a 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -497,5 +497,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, struct xdp_frame *xdpf, struct page *page, unsigned int q_idx); +void prueth_rx_cleanup(void *data, dma_addr_t desc_dma); +void prueth_tx_cleanup(void *data, dma_addr_t desc_dma); #endif /* __NET_TI_ICSSG_PRUETH_H */ -- 2.43.0 Implement XSK NDOs (setup, wakeup) and create XSK Rx and Tx queues. xsk_qid stores the queue id for a given port which has been registered for zero copy AF_XDP and used to acquire UMEM pointer if registered. Based on the xsk_qid and the xsk_pool (umem) the driver is either in copy or zero copy mode. In case of copy mode the xsk_qid value will be invalid and will be set to valid queue id when enabling zero copy. To enable zero copy, the Rx queues are destroyed, i.e., descriptors pushed to fq and cq are freed to remap them to xdp buffers from the umem. Signed-off-by: Meghana Malladi --- Changes from v2(v3-v2): - Revert destroy/create for Rx/Tx from XDP_SETUP_PROG ndo_bpf() call as suggested by Jakub Kicinski drivers/net/ethernet/ti/icssg/icssg_common.c | 2 +- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 138 +++++++++++++++++++ drivers/net/ethernet/ti/icssg/icssg_prueth.h | 10 ++ 3 files changed, 149 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 580a968594fc..cbe898af3b4c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -754,7 +754,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) } pa = page_address(page); - if (emac->xdp_prog) { + if (prueth_xdp_is_enabled(emac)) { xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq); xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 5e9926699805..2c18b561a46c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -47,6 +47,9 @@ NETIF_F_HW_HSR_TAG_INS | \ NETIF_F_HW_HSR_TAG_RM) +#define PRUETH_RX_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC |\ + DMA_ATTR_WEAK_ORDERING) + /* CTRLMMR_ICSSG_RGMII_CTRL register bits */ #define ICSSG_CTRL_RGMII_ID_MODE BIT(24) @@ -727,6 +730,20 @@ static int icssg_update_vlan_mcast(struct net_device *vdev, int vid, return 0; } +static void prueth_set_xsk_pool(struct prueth_emac *emac, u16 queue_id) +{ + struct prueth_tx_chn *tx_chn = &emac->tx_chns[queue_id]; + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + + if (emac->xsk_qid != queue_id) { + rx_chn->xsk_pool = NULL; + tx_chn->xsk_pool = NULL; + } else { + rx_chn->xsk_pool = xsk_get_pool_from_qid(emac->ndev, queue_id); + tx_chn->xsk_pool = xsk_get_pool_from_qid(emac->ndev, queue_id); + } +} + static void prueth_destroy_txq(struct prueth_emac *emac) { int ret, i; @@ -867,6 +884,7 @@ static int emac_ndo_open(struct net_device *ndev) return ret; } + emac->xsk_qid = -EINVAL; init_completion(&emac->cmd_complete); ret = prueth_init_tx_chns(emac); if (ret) { @@ -1192,6 +1210,109 @@ static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf) return 0; } +static int prueth_xsk_pool_enable(struct prueth_emac *emac, + struct xsk_buff_pool *pool, u16 queue_id) +{ + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + u32 frame_size; + int ret; + + if (queue_id >= PRUETH_MAX_RX_FLOWS || + queue_id >= emac->tx_ch_num) { + netdev_err(emac->ndev, "Invalid XSK queue ID %d\n", queue_id); + return -EINVAL; + } + + frame_size = xsk_pool_get_rx_frame_size(pool); + if (frame_size < PRUETH_MAX_PKT_SIZE) + return -EOPNOTSUPP; + + ret = xsk_pool_dma_map(pool, rx_chn->dma_dev, PRUETH_RX_DMA_ATTR); + if (ret) { + netdev_err(emac->ndev, "Failed to map XSK pool: %d\n", ret); + return ret; + } + + if (netif_running(emac->ndev)) { + /* stop packets from wire for graceful teardown */ + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_DISABLE); + if (ret) + return ret; + prueth_destroy_rxq(emac); + } + + emac->xsk_qid = queue_id; + prueth_set_xsk_pool(emac, queue_id); + + if (netif_running(emac->ndev)) { + ret = prueth_create_rxq(emac); + if (ret) { + netdev_err(emac->ndev, "Failed to create RX queue: %d\n", ret); + return ret; + } + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_FORWARD); + if (ret) { + prueth_destroy_rxq(emac); + return ret; + } + ret = prueth_xsk_wakeup(emac->ndev, queue_id, XDP_WAKEUP_RX); + if (ret) + return ret; + } + + return 0; +} + +static int prueth_xsk_pool_disable(struct prueth_emac *emac, u16 queue_id) +{ + struct xsk_buff_pool *pool; + int ret; + + if (queue_id >= PRUETH_MAX_RX_FLOWS || + queue_id >= emac->tx_ch_num) { + netdev_err(emac->ndev, "Invalid XSK queue ID %d\n", queue_id); + return -EINVAL; + } + + if (emac->xsk_qid != queue_id) { + netdev_err(emac->ndev, "XSK queue ID %d not registered\n", queue_id); + return -EINVAL; + } + + pool = xsk_get_pool_from_qid(emac->ndev, queue_id); + if (!pool) { + netdev_err(emac->ndev, "No XSK pool registered for queue %d\n", queue_id); + return -EINVAL; + } + + if (netif_running(emac->ndev)) { + /* stop packets from wire for graceful teardown */ + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_DISABLE); + if (ret) + return ret; + prueth_destroy_rxq(emac); + } + + xsk_pool_dma_unmap(pool, PRUETH_RX_DMA_ATTR); + emac->xsk_qid = -EINVAL; + prueth_set_xsk_pool(emac, queue_id); + + if (netif_running(emac->ndev)) { + ret = prueth_create_rxq(emac); + if (ret) { + netdev_err(emac->ndev, "Failed to create RX queue: %d\n", ret); + return ret; + } + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_FORWARD); + if (ret) { + prueth_destroy_rxq(emac); + return ret; + } + } + + return 0; +} + /** * emac_ndo_bpf - implements ndo_bpf for icssg_prueth * @ndev: network adapter device @@ -1206,11 +1327,27 @@ static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) switch (bpf->command) { case XDP_SETUP_PROG: return emac_xdp_setup(emac, bpf); + case XDP_SETUP_XSK_POOL: + return bpf->xsk.pool ? + prueth_xsk_pool_enable(emac, bpf->xsk.pool, bpf->xsk.queue_id) : + prueth_xsk_pool_disable(emac, bpf->xsk.queue_id); default: return -EINVAL; } } +int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) +{ + struct prueth_emac *emac = netdev_priv(ndev); + + if (qid >= PRUETH_MAX_RX_FLOWS || qid >= emac->tx_ch_num) { + netdev_err(ndev, "Invalid XSK queue ID %d\n", qid); + return -EINVAL; + } + + return 0; +} + static const struct net_device_ops emac_netdev_ops = { .ndo_open = emac_ndo_open, .ndo_stop = emac_ndo_stop, @@ -1227,6 +1364,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid, .ndo_bpf = emac_ndo_bpf, .ndo_xdp_xmit = emac_xdp_xmit, + .ndo_xsk_wakeup = prueth_xsk_wakeup, }; static int prueth_netdev_init(struct prueth *prueth, diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index e0dadbfca45a..50f40eaa103a 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include "icssg_config.h" #include "icss_iep.h" @@ -126,6 +128,7 @@ struct prueth_tx_chn { char name[32]; struct hrtimer tx_hrtimer; unsigned long tx_pace_timeout_ns; + struct xsk_buff_pool *xsk_pool; }; struct prueth_rx_chn { @@ -138,6 +141,7 @@ struct prueth_rx_chn { char name[32]; struct page_pool *pg_pool; struct xdp_rxq_info xdp_rxq; + struct xsk_buff_pool *xsk_pool; }; enum prueth_swdata_type { @@ -241,6 +245,7 @@ struct prueth_emac { struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID]; struct bpf_prog *xdp_prog; struct xdp_attachment_info xdpi; + int xsk_qid; }; /* The buf includes headroom compatible with both skb and xdpf */ @@ -499,5 +504,10 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, unsigned int q_idx); void prueth_rx_cleanup(void *data, dma_addr_t desc_dma); void prueth_tx_cleanup(void *data, dma_addr_t desc_dma); +int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags); +static inline bool prueth_xdp_is_enabled(struct prueth_emac *emac) +{ + return !!READ_ONCE(emac->xdp_prog); +} #endif /* __NET_TI_ICSSG_PRUETH_H */ -- 2.43.0 Use xsk_pool inside tx_chn to check if a given Tx queue id is registered for xsk zero copy, which gets populated during xsk enable If xsk_pool is set, get frames from the pool in NAPI context and submit them to the Tx channel. Tx completion is also handled in the NAPI context. Use PRUETH_SWDATA_XSK to recycle xsk buffers back to the umem pool. Add XDP_WAKEUP_TX support to enable xsk_wakeup for Tx. Signed-off-by: Meghana Malladi --- drivers/net/ethernet/ti/icssg/icssg_common.c | 112 ++++++++++++++++++- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 13 +++ drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 + 3 files changed, 125 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index cbe898af3b4c..b526f246ecb9 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -93,15 +93,91 @@ void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num) } EXPORT_SYMBOL_GPL(prueth_ndev_del_tx_napi); +static int emac_xsk_xmit_zc(struct prueth_emac *emac, + unsigned int q_idx) +{ + struct prueth_tx_chn *tx_chn = &emac->tx_chns[q_idx]; + struct xsk_buff_pool *pool = tx_chn->xsk_pool; + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *host_desc; + dma_addr_t dma_desc, dma_buf; + struct prueth_swdata *swdata; + struct xdp_desc xdp_desc; + int num_tx = 0, pkt_len; + int descs_avail, ret; + u32 *epib; + int i; + + descs_avail = k3_cppi_desc_pool_avail(tx_chn->desc_pool); + /* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS + * will be available for normal TX path and queue is stopped there if + * necessary + */ + if (descs_avail <= MAX_SKB_FRAGS) + return 0; + + descs_avail -= MAX_SKB_FRAGS; + + for (i = 0; i < descs_avail; i++) { + if (!xsk_tx_peek_desc(pool, &xdp_desc)) + break; + + dma_buf = xsk_buff_raw_get_dma(pool, xdp_desc.addr); + pkt_len = xdp_desc.len; + xsk_buff_raw_dma_sync_for_device(pool, dma_buf, pkt_len); + + host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); + if (unlikely(!host_desc)) + break; + + cppi5_hdesc_init(host_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, + PRUETH_NAV_PS_DATA_SIZE); + cppi5_hdesc_set_pkttype(host_desc, 0); + epib = host_desc->epib; + epib[0] = 0; + epib[1] = 0; + cppi5_hdesc_set_pktlen(host_desc, pkt_len); + cppi5_desc_set_tags_ids(&host_desc->hdr, 0, + (emac->port_id | (q_idx << 8))); + + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &dma_buf); + cppi5_hdesc_attach_buf(host_desc, dma_buf, pkt_len, dma_buf, + pkt_len); + + swdata = cppi5_hdesc_get_swdata(host_desc); + swdata->type = PRUETH_SWDATA_XSK; + + dma_desc = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, + host_desc); + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, + host_desc, dma_desc); + + if (ret) { + ndev->stats.tx_errors++; + k3_cppi_desc_pool_free(tx_chn->desc_pool, host_desc); + break; + } + + num_tx++; + } + + xsk_tx_release(tx_chn->xsk_pool); + return num_tx; +} + void prueth_xmit_free(struct prueth_tx_chn *tx_chn, struct cppi5_host_desc_t *desc) { struct cppi5_host_desc_t *first_desc, *next_desc; dma_addr_t buf_dma, next_desc_dma; + struct prueth_swdata *swdata; u32 buf_dma_len; first_desc = desc; next_desc = first_desc; + swdata = cppi5_hdesc_get_swdata(first_desc); + if (swdata->type == PRUETH_SWDATA_XSK) + goto free_pool; cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); @@ -126,6 +202,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); } +free_pool: k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); } EXPORT_SYMBOL_GPL(prueth_xmit_free); @@ -139,7 +216,9 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, struct prueth_swdata *swdata; struct prueth_tx_chn *tx_chn; unsigned int total_bytes = 0; + int xsk_frames_done = 0; struct xdp_frame *xdpf; + unsigned int pkt_len; struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; @@ -176,6 +255,11 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, total_bytes += xdpf->len; xdp_return_frame(xdpf); break; + case PRUETH_SWDATA_XSK: + pkt_len = cppi5_hdesc_get_pktlen(desc_tx); + dev_sw_netstats_tx_add(ndev, 1, pkt_len); + xsk_frames_done++; + break; default: prueth_xmit_free(tx_chn, desc_tx); ndev->stats.tx_dropped++; @@ -204,6 +288,18 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, __netif_tx_unlock(netif_txq); } + if (tx_chn->xsk_pool) { + if (xsk_frames_done) + xsk_tx_completed(tx_chn->xsk_pool, xsk_frames_done); + + if (xsk_uses_need_wakeup(tx_chn->xsk_pool)) + xsk_set_tx_need_wakeup(tx_chn->xsk_pool); + + netif_txq = netdev_get_tx_queue(ndev, chn); + txq_trans_cond_update(netif_txq); + emac_xsk_xmit_zc(emac, chn); + } + return num_tx; } @@ -212,7 +308,10 @@ static enum hrtimer_restart emac_tx_timer_callback(struct hrtimer *timer) struct prueth_tx_chn *tx_chns = container_of(timer, struct prueth_tx_chn, tx_hrtimer); - enable_irq(tx_chns->irq); + if (tx_chns->irq_disabled) { + tx_chns->irq_disabled = false; + enable_irq(tx_chns->irq); + } return HRTIMER_NORESTART; } @@ -235,7 +334,10 @@ static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget) ns_to_ktime(tx_chn->tx_pace_timeout_ns), HRTIMER_MODE_REL_PINNED); } else { - enable_irq(tx_chn->irq); + if (tx_chn->irq_disabled) { + tx_chn->irq_disabled = false; + enable_irq(tx_chn->irq); + } } } @@ -246,6 +348,7 @@ static irqreturn_t prueth_tx_irq(int irq, void *dev_id) { struct prueth_tx_chn *tx_chn = dev_id; + tx_chn->irq_disabled = true; disable_irq_nosync(irq); napi_schedule(&tx_chn->napi_tx); @@ -1032,6 +1135,7 @@ void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) { struct prueth_tx_chn *tx_chn = data; struct cppi5_host_desc_t *desc_tx; + struct xsk_buff_pool *xsk_pool; struct prueth_swdata *swdata; struct xdp_frame *xdpf; struct sk_buff *skb; @@ -1048,6 +1152,10 @@ void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) xdpf = swdata->data.xdpf; xdp_return_frame(xdpf); break; + case PRUETH_SWDATA_XSK: + xsk_pool = tx_chn->xsk_pool; + xsk_tx_completed(xsk_pool, 1); + break; default: break; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 2c18b561a46c..78778f0e8f4a 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1339,12 +1339,25 @@ static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) { struct prueth_emac *emac = netdev_priv(ndev); + struct prueth_tx_chn *tx_chn = &emac->tx_chns[qid]; if (qid >= PRUETH_MAX_RX_FLOWS || qid >= emac->tx_ch_num) { netdev_err(ndev, "Invalid XSK queue ID %d\n", qid); return -EINVAL; } + if (!tx_chn->xsk_pool) { + netdev_err(ndev, "XSK pool not registered for queue %d\n", qid); + return -EINVAL; + } + + if (flags & XDP_WAKEUP_TX) { + if (!napi_if_scheduled_mark_missed(&tx_chn->napi_tx)) { + if (likely(napi_schedule_prep(&tx_chn->napi_tx))) + __napi_schedule(&tx_chn->napi_tx); + } + } + return 0; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 50f40eaa103a..90b3e9c9e148 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -129,6 +129,7 @@ struct prueth_tx_chn { struct hrtimer tx_hrtimer; unsigned long tx_pace_timeout_ns; struct xsk_buff_pool *xsk_pool; + bool irq_disabled; }; struct prueth_rx_chn { @@ -150,6 +151,7 @@ enum prueth_swdata_type { PRUETH_SWDATA_PAGE, PRUETH_SWDATA_CMD, PRUETH_SWDATA_XDPF, + PRUETH_SWDATA_XSK, }; struct prueth_swdata { -- 2.43.0 emac_run_xdp function runs xdp program, at a given hook point in the Rx path of the driver in NAPI context and returns XDP return codes. In zero copy mode the driver receives packets using UMEM frames instead of pages (native XDP). Decouple the usage of page in this function. Signed-off-by: Meghana Malladi --- drivers/net/ethernet/ti/icssg/icssg_common.c | 26 ++++++++++++-------- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 3 ++- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 9 +++++-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index b526f246ecb9..bc6dc7bf3f6a 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -647,15 +647,15 @@ void emac_rx_timestamp(struct prueth_emac *emac, * emac_xmit_xdp_frame - transmits an XDP frame * @emac: emac device * @xdpf: data to transmit - * @page: page from page pool if already DMA mapped * @q_idx: queue id + * @buff_type: Type of buffer to be transmitted * * Return: XDP state */ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, struct xdp_frame *xdpf, - struct page *page, - unsigned int q_idx) + unsigned int q_idx, + enum prueth_tx_buff_type buff_type) { struct cppi5_host_desc_t *first_desc; struct net_device *ndev = emac->ndev; @@ -663,6 +663,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, struct prueth_tx_chn *tx_chn; dma_addr_t desc_dma, buf_dma; struct prueth_swdata *swdata; + struct page *page; u32 *epib; int ret; @@ -679,7 +680,12 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, return ICSSG_XDP_CONSUMED; /* drop */ } - if (page) { /* already DMA mapped by page_pool */ + if (buff_type == PRUETH_TX_BUFF_TYPE_XDP_TX) { /* already DMA mapped by page_pool */ + page = virt_to_head_page(xdpf->data); + if (unlikely(!page)) { + netdev_err(ndev, "xdp tx: failed to get page from xdpf\n"); + goto drop_free_descs; + } buf_dma = page_pool_get_dma_addr(page); buf_dma += xdpf->headroom + sizeof(struct xdp_frame); } else { /* Map the linear buffer */ @@ -734,13 +740,11 @@ EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame); * emac_run_xdp - run an XDP program * @emac: emac device * @xdp: XDP buffer containing the frame - * @page: page with RX data if already DMA mapped * @len: Rx descriptor packet length * * Return: XDP state */ -static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, - struct page *page, u32 *len) +static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, u32 *len) { struct net_device *ndev = emac->ndev; struct netdev_queue *netif_txq; @@ -767,7 +771,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, q_idx = cpu % emac->tx_ch_num; netif_txq = netdev_get_tx_queue(ndev, q_idx); __netif_tx_lock(netif_txq, cpu); - result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); + result = emac_xmit_xdp_frame(emac, xdpf, q_idx, + PRUETH_TX_BUFF_TYPE_XDP_TX); __netif_tx_unlock(netif_txq); if (result == ICSSG_XDP_CONSUMED) { ndev->stats.tx_dropped++; @@ -792,7 +797,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: ndev->stats.rx_dropped++; - page_pool_recycle_direct(emac->rx_chns.pg_pool, page); + page_pool_recycle_direct(emac->rx_chns.pg_pool, + virt_to_head_page(xdp->data)); return ICSSG_XDP_CONSUMED; } } @@ -861,7 +867,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq); xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); - *xdp_state = emac_run_xdp(emac, &xdp, page, &pkt_len); + *xdp_state = emac_run_xdp(emac, &xdp, &pkt_len); if (*xdp_state != ICSSG_XDP_PASS) goto requeue; headroom = xdp.data - xdp.data_hard_start; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 78778f0e8f4a..296034ab6c67 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1177,7 +1177,8 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame __netif_tx_lock(netif_txq, cpu); for (i = 0; i < n; i++) { xdpf = frames[i]; - err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); + err = emac_xmit_xdp_frame(emac, xdpf, q_idx, + PRUETH_TX_BUFF_TYPE_XDP_NDO); if (err != ICSSG_XDP_TX) { ndev->stats.tx_dropped++; break; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 90b3e9c9e148..74a0e5ed2a47 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -154,6 +154,11 @@ enum prueth_swdata_type { PRUETH_SWDATA_XSK, }; +enum prueth_tx_buff_type { + PRUETH_TX_BUFF_TYPE_XDP_TX, + PRUETH_TX_BUFF_TYPE_XDP_NDO, +}; + struct prueth_swdata { enum prueth_swdata_type type; union prueth_data { @@ -502,8 +507,8 @@ void prueth_put_cores(struct prueth *prueth, int slice); u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns); u32 emac_xmit_xdp_frame(struct prueth_emac *emac, struct xdp_frame *xdpf, - struct page *page, - unsigned int q_idx); + unsigned int q_idx, + enum prueth_tx_buff_type buff_type); void prueth_rx_cleanup(void *data, dma_addr_t desc_dma); void prueth_tx_cleanup(void *data, dma_addr_t desc_dma); int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags); -- 2.43.0 Use xsk_pool inside rx_chn to check if a given Rx queue id is registered for xsk zero copy, which gets populated during xsk enable. Update prueth_create_xdp_rxqs to register and support two different memory models (xsk and page) for a given Rx queue, if registered for zero copy. If xsk_pool is registered, allocate buffers from UMEM and map them to the hardware Rx descriptors. In NAPI context, run the XDP program for each packet and process the xsk buffer according to the XDP result codes. Also allocate new set of buffers from UMEM for the next batch of NAPI Rx processing. Add XDK_WAKEUP_RX support to support xsk wakeup for Rx. Move prueth_create_page_pool to prueth_init_rx_chns to avoid freeing and re-allocating the system memory every time there is a transition from zero copy to copy and prevents any type of memory fragmentation or leak. Signed-off-by: Meghana Malladi --- Changes from v2(v3-v2): - Create/destroy page_pool only when bringing up/down the interfaces to avoid freeing the rx memory during zerocopy<->copy transitions as suggested by Jakub Kicinski drivers/net/ethernet/ti/icssg/icssg_common.c | 324 +++++++++++++++---- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 60 +++- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 + 3 files changed, 314 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index bc6dc7bf3f6a..95474eb61d3c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -465,6 +465,29 @@ int prueth_init_tx_chns(struct prueth_emac *emac) } EXPORT_SYMBOL_GPL(prueth_init_tx_chns); +static struct page_pool *prueth_create_page_pool(struct prueth_emac *emac, + struct device *dma_dev, + int size) +{ + struct page_pool_params pp_params = { 0 }; + struct page_pool *pool; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = size; + pp_params.nid = dev_to_node(emac->prueth->dev); + pp_params.dma_dir = DMA_BIDIRECTIONAL; + pp_params.dev = dma_dev; + pp_params.napi = &emac->napi_rx; + pp_params.max_len = PAGE_SIZE; + + pool = page_pool_create(&pp_params); + if (IS_ERR(pool)) + netdev_err(emac->ndev, "cannot create rx page pool\n"); + + return pool; +} + int prueth_init_rx_chns(struct prueth_emac *emac, struct prueth_rx_chn *rx_chn, char *name, u32 max_rflows, @@ -474,6 +497,7 @@ int prueth_init_rx_chns(struct prueth_emac *emac, struct device *dev = emac->prueth->dev; struct net_device *ndev = emac->ndev; u32 fdqring_id, hdesc_size; + struct page_pool *pool; int i, ret = 0, slice; int flow_id_base; @@ -516,6 +540,14 @@ int prueth_init_rx_chns(struct prueth_emac *emac, goto fail; } + pool = prueth_create_page_pool(emac, rx_chn->dma_dev, rx_chn->descs_num); + if (IS_ERR(pool)) { + ret = PTR_ERR(pool); + goto fail; + } + + rx_chn->pg_pool = pool; + flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); if (emac->is_sr1 && !strcmp(name, "rxmgm")) { emac->rx_mgm_flow_id_base = flow_id_base; @@ -797,12 +829,189 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, u32 *len fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: ndev->stats.rx_dropped++; - page_pool_recycle_direct(emac->rx_chns.pg_pool, - virt_to_head_page(xdp->data)); return ICSSG_XDP_CONSUMED; } } +static int prueth_dma_rx_push_mapped_zc(struct prueth_emac *emac, + struct prueth_rx_chn *rx_chn, + struct xdp_buff *xdp) +{ + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *desc_rx; + struct prueth_swdata *swdata; + dma_addr_t desc_dma; + dma_addr_t buf_dma; + int buf_len; + + buf_dma = xsk_buff_xdp_get_dma(xdp); + desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool); + if (!desc_rx) { + netdev_err(ndev, "rx push: failed to allocate descriptor\n"); + return -ENOMEM; + } + desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); + + cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, + PRUETH_NAV_PS_DATA_SIZE); + k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); + buf_len = xsk_pool_get_rx_frame_size(rx_chn->xsk_pool); + cppi5_hdesc_attach_buf(desc_rx, buf_dma, buf_len, buf_dma, buf_len); + swdata = cppi5_hdesc_get_swdata(desc_rx); + swdata->type = PRUETH_SWDATA_XSK; + swdata->data.xdp = xdp; + + return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, PRUETH_RX_FLOW_DATA, + desc_rx, desc_dma); + + return 0; +} + +static int prueth_rx_alloc_zc(struct prueth_emac *emac, int budget) +{ + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + struct xdp_buff *xdp; + int i, ret; + + for (i = 0; i < budget; i++) { + xdp = xsk_buff_alloc(rx_chn->xsk_pool); + if (!xdp) + break; + + ret = prueth_dma_rx_push_mapped_zc(emac, rx_chn, xdp); + if (ret) { + netdev_err(emac->ndev, "rx alloc: failed to map descriptors to xdp buff\n"); + xsk_buff_free(xdp); + break; + } + } + + return i; +} + +static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, u32 *psdata) +{ + unsigned int pkt_len = xdp->data_end - xdp->data; + struct net_device *ndev = emac->ndev; + struct sk_buff *skb; + + skb = napi_alloc_skb(&emac->napi_rx, xdp->data_end - xdp->data_hard_start); + if (unlikely(!skb)) { + ndev->stats.rx_dropped++; + return; + } + + skb_reserve(skb, PRUETH_HEADROOM); + skb_put(skb, pkt_len); + skb->dev = ndev; + + /* RX HW timestamp */ + if (emac->rx_ts_enabled) + emac_rx_timestamp(emac, skb, psdata); + + if (emac->prueth->is_switch_mode) + skb->offload_fwd_mark = emac->offload_fwd_mark; + skb->protocol = eth_type_trans(skb, ndev); + + skb_mark_for_recycle(skb); + napi_gro_receive(&emac->napi_rx, skb); + ndev->stats.rx_bytes += pkt_len; + ndev->stats.rx_packets++; +} + +static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, + int budget) +{ + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + u32 buf_dma_len, pkt_len, port_id = 0; + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *desc_rx; + struct prueth_swdata *swdata; + dma_addr_t desc_dma, buf_dma; + struct xdp_buff *xdp; + int xdp_status = 0; + int count = 0; + u32 *psdata; + int ret; + + while (count < budget) { + ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma); + if (ret) { + if (ret != -ENODATA) + netdev_err(ndev, "rx pop: failed: %d\n", ret); + break; + } + + if (cppi5_desc_is_tdcm(desc_dma)) { + complete(&emac->tdown_complete); + break; + } + + desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_rx); + if (swdata->type != PRUETH_SWDATA_XSK) { + netdev_err(ndev, "rx_pkt: invalid swdata->type %d\n", swdata->type); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + break; + } + + xdp = swdata->data.xdp; + cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); + pkt_len = cppi5_hdesc_get_pktlen(desc_rx); + /* firmware adds 4 CRC bytes, strip them */ + pkt_len -= 4; + cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); + psdata = cppi5_hdesc_get_psdata(desc_rx); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + count++; + xsk_buff_set_size(xdp, pkt_len); + xsk_buff_dma_sync_for_cpu(xdp); + + if (prueth_xdp_is_enabled(emac)) { + ret = emac_run_xdp(emac, xdp, &pkt_len); + switch (ret) { + case ICSSG_XDP_PASS: + /* prepare skb and send to n/w stack */ + emac_dispatch_skb_zc(emac, xdp, psdata); + xsk_buff_free(xdp); + break; + case ICSSG_XDP_CONSUMED: + xsk_buff_free(xdp); + break; + case ICSSG_XDP_TX: + case ICSSG_XDP_REDIR: + xdp_status |= ret; + break; + } + } else { + /* prepare skb and send to n/w stack */ + emac_dispatch_skb_zc(emac, xdp, psdata); + xsk_buff_free(xdp); + } + } + + if (xdp_status & ICSSG_XDP_REDIR) + xdp_do_flush(); + + /* Allocate xsk buffers from the pool for the "count" number of + * packets processed in order to be able to receive more packets. + */ + ret = prueth_rx_alloc_zc(emac, count); + + if (xsk_uses_need_wakeup(rx_chn->xsk_pool)) { + /* If the user space doesn't provide enough buffers then it must + * explicitly wake up the kernel when new buffers are available + */ + if (ret < count) + xsk_set_rx_need_wakeup(rx_chn->xsk_pool); + else + xsk_clear_rx_need_wakeup(rx_chn->xsk_pool); + } + + return count; +} + static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) { struct prueth_rx_chn *rx_chn = &emac->rx_chns; @@ -849,7 +1058,6 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) /* firmware adds 4 CRC bytes, strip them */ pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* if allocation fails we drop the packet but push the @@ -921,12 +1129,16 @@ void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) struct cppi5_host_desc_t *desc_rx; struct prueth_swdata *swdata; struct page_pool *pool; + struct xdp_buff *xdp; struct page *page; pool = rx_chn->pg_pool; desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); - if (swdata->type == PRUETH_SWDATA_PAGE) { + if (rx_chn->xsk_pool) { + xdp = swdata->data.xdp; + xsk_buff_free(xdp); + } else { page = swdata->data.page; page_pool_recycle_direct(pool, page); } @@ -1174,6 +1386,7 @@ irqreturn_t prueth_rx_irq(int irq, void *dev_id) { struct prueth_emac *emac = dev_id; + emac->rx_chns.irq_disabled = true; disable_irq_nosync(irq); napi_schedule(&emac->napi_rx); @@ -1201,6 +1414,7 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA; int flow = emac->is_sr1 ? PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS; + struct prueth_rx_chn *rx_chn = &emac->rx_chns; int xdp_state_or = 0; int num_rx = 0; int cur_budget; @@ -1208,14 +1422,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) int ret; while (flow--) { - cur_budget = budget - num_rx; - - while (cur_budget--) { - ret = emac_rx_packet(emac, flow, &xdp_state); - xdp_state_or |= xdp_state; - if (ret) - break; - num_rx++; + if (rx_chn->xsk_pool) { + num_rx = emac_rx_packet_zc(emac, flow, budget); + } else { + cur_budget = budget - num_rx; + + while (cur_budget--) { + ret = emac_rx_packet(emac, flow, &xdp_state); + xdp_state_or |= xdp_state; + if (ret) + break; + num_rx++; + } } if (num_rx >= budget) @@ -1231,7 +1449,11 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) ns_to_ktime(emac->rx_pace_timeout_ns), HRTIMER_MODE_REL_PINNED); } else { - enable_irq(emac->rx_chns.irq[rx_flow]); + if (emac->rx_chns.irq_disabled) { + /* re-enable the RX IRQ */ + emac->rx_chns.irq_disabled = false; + enable_irq(emac->rx_chns.irq[rx_flow]); + } } } @@ -1239,62 +1461,48 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) } EXPORT_SYMBOL_GPL(icssg_napi_rx_poll); -static struct page_pool *prueth_create_page_pool(struct prueth_emac *emac, - struct device *dma_dev, - int size) -{ - struct page_pool_params pp_params = { 0 }; - struct page_pool *pool; - - pp_params.order = 0; - pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; - pp_params.pool_size = size; - pp_params.nid = dev_to_node(emac->prueth->dev); - pp_params.dma_dir = DMA_BIDIRECTIONAL; - pp_params.dev = dma_dev; - pp_params.napi = &emac->napi_rx; - pp_params.max_len = PAGE_SIZE; - - pool = page_pool_create(&pp_params); - if (IS_ERR(pool)) - netdev_err(emac->ndev, "cannot create rx page pool\n"); - - return pool; -} - int prueth_prepare_rx_chan(struct prueth_emac *emac, struct prueth_rx_chn *chn, int buf_size) { - struct page_pool *pool; struct page *page; + int desc_avail; int i, ret; - pool = prueth_create_page_pool(emac, chn->dma_dev, chn->descs_num); - if (IS_ERR(pool)) - return PTR_ERR(pool); - - chn->pg_pool = pool; + desc_avail = k3_cppi_desc_pool_avail(chn->desc_pool); + if (desc_avail < chn->descs_num) + netdev_warn(emac->ndev, + "not enough RX descriptors available %d < %d\n", + desc_avail, chn->descs_num); - for (i = 0; i < chn->descs_num; i++) { - /* NOTE: we're not using memory efficiently here. - * 1 full page (4KB?) used here instead of - * PRUETH_MAX_PKT_SIZE (~1.5KB?) + if (chn->xsk_pool) { + /* get pages from xsk_pool and push to RX ring + * queue as much as possible */ - page = page_pool_dev_alloc_pages(pool); - if (!page) { - netdev_err(emac->ndev, "couldn't allocate rx page\n"); - ret = -ENOMEM; + ret = prueth_rx_alloc_zc(emac, desc_avail); + if (!ret) goto recycle_alloc_pg; - } + } else { + for (i = 0; i < desc_avail; i++) { + /* NOTE: we're not using memory efficiently here. + * 1 full page (4KB?) used here instead of + * PRUETH_MAX_PKT_SIZE (~1.5KB?) + */ + page = page_pool_dev_alloc_pages(chn->pg_pool); + if (!page) { + netdev_err(emac->ndev, "couldn't allocate rx page\n"); + ret = -ENOMEM; + goto recycle_alloc_pg; + } - ret = prueth_dma_rx_push_mapped(emac, chn, page, buf_size); - if (ret < 0) { - netdev_err(emac->ndev, - "cannot submit page for rx chan %s ret %d\n", - chn->name, ret); - page_pool_recycle_direct(pool, page); - goto recycle_alloc_pg; + ret = prueth_dma_rx_push_mapped(emac, chn, page, buf_size); + if (ret < 0) { + netdev_err(emac->ndev, + "cannot submit page for rx chan %s ret %d\n", + chn->name, ret); + page_pool_recycle_direct(chn->pg_pool, page); + goto recycle_alloc_pg; + } } } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 296034ab6c67..92f2c33affda 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -395,7 +395,11 @@ static enum hrtimer_restart emac_rx_timer_callback(struct hrtimer *timer) container_of(timer, struct prueth_emac, rx_hrtimer); int rx_flow = PRUETH_RX_FLOW_DATA; - enable_irq(emac->rx_chns.irq[rx_flow]); + if (emac->rx_chns.irq_disabled) { + /* re-enable the RX IRQ */ + emac->rx_chns.irq_disabled = false; + enable_irq(emac->rx_chns.irq[rx_flow]); + } return HRTIMER_NORESTART; } @@ -569,31 +573,41 @@ const struct icss_iep_clockops prueth_iep_clockops = { .perout_enable = prueth_perout_enable, }; +static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac) +{ + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; + + if (xdp_rxq_info_is_reg(rxq)) + xdp_rxq_info_unreg(rxq); +} + static int prueth_create_xdp_rxqs(struct prueth_emac *emac) { struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; struct page_pool *pool = emac->rx_chns.pg_pool; + struct prueth_rx_chn *rx_chn = &emac->rx_chns; int ret; ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id); if (ret) return ret; - ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); - if (ret) - xdp_rxq_info_unreg(rxq); - - return ret; -} - -static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac) -{ - struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; + if (rx_chn->xsk_pool) { + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); + if (ret) + goto xdp_unreg; + xsk_pool_set_rxq_info(rx_chn->xsk_pool, rxq); + } else { + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); + if (ret) + goto xdp_unreg; + } - if (!xdp_rxq_info_is_reg(rxq)) - return; + return 0; - xdp_rxq_info_unreg(rxq); +xdp_unreg: + prueth_destroy_xdp_rxqs(emac); + return ret; } static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr) @@ -1341,6 +1355,12 @@ int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) { struct prueth_emac *emac = netdev_priv(ndev); struct prueth_tx_chn *tx_chn = &emac->tx_chns[qid]; + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + + if (emac->xsk_qid != qid) { + netdev_err(ndev, "XSK queue %d not registered\n", qid); + return -EINVAL; + } if (qid >= PRUETH_MAX_RX_FLOWS || qid >= emac->tx_ch_num) { netdev_err(ndev, "Invalid XSK queue ID %d\n", qid); @@ -1352,6 +1372,11 @@ int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) return -EINVAL; } + if (!rx_chn->xsk_pool) { + netdev_err(ndev, "XSK pool not registered for RX queue %d\n", qid); + return -EINVAL; + } + if (flags & XDP_WAKEUP_TX) { if (!napi_if_scheduled_mark_missed(&tx_chn->napi_tx)) { if (likely(napi_schedule_prep(&tx_chn->napi_tx))) @@ -1359,6 +1384,13 @@ int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) } } + if (flags & XDP_WAKEUP_RX) { + if (!napi_if_scheduled_mark_missed(&emac->napi_rx)) { + if (likely(napi_schedule_prep(&emac->napi_rx))) + __napi_schedule(&emac->napi_rx); + } + } + return 0; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 74a0e5ed2a47..540caf5de5fd 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -143,6 +143,7 @@ struct prueth_rx_chn { struct page_pool *pg_pool; struct xdp_rxq_info xdp_rxq; struct xsk_buff_pool *xsk_pool; + bool irq_disabled; }; enum prueth_swdata_type { @@ -166,6 +167,7 @@ struct prueth_swdata { struct page *page; u32 cmd; struct xdp_frame *xdpf; + struct xdp_buff *xdp; } data; }; -- 2.43.0 Enable the zero copy feature flag in xdp_set_features_flag() for a given ndev to get the AF-XDP zero copy support running for both Tx and Rx. Signed-off-by: Meghana Malladi --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 92f2c33affda..9894f1f30b58 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1545,7 +1545,8 @@ static int prueth_netdev_init(struct prueth *prueth, xdp_set_features_flag(ndev, NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT); + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_XSK_ZEROCOPY); netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll); hrtimer_setup(&emac->rx_hrtimer, &emac_rx_timer_callback, CLOCK_MONOTONIC, -- 2.43.0