Add PCIe FDMA support for lan966x. The PCIe FDMA path uses contiguous DMA buffers mapped through the endpoint's ATU, with memcpy-based frame transfer instead of per-page DMA mappings. With PCIe FDMA, throughput increases from ~33 Mbps (register-based I/O) to ~620 Mbps on an Intel x86 host with a lan966x PCIe card. Signed-off-by: Daniel Machon --- drivers/net/ethernet/microchip/lan966x/Makefile | 4 + .../ethernet/microchip/lan966x/lan966x_fdma_pci.c | 329 +++++++++++++++++++++ .../net/ethernet/microchip/lan966x/lan966x_main.c | 11 + .../net/ethernet/microchip/lan966x/lan966x_main.h | 11 + .../net/ethernet/microchip/lan966x/lan966x_regs.h | 1 + 5 files changed, 356 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index 4cdbe263502c..ac0beceb2a0d 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -18,6 +18,10 @@ lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \ lan966x-switch-$(CONFIG_LAN966X_DCB) += lan966x_dcb.o lan966x-switch-$(CONFIG_DEBUG_FS) += lan966x_vcap_debugfs.o +ifdef CONFIG_MCHP_LAN966X_PCI +lan966x-switch-y += lan966x_fdma_pci.o +endif + # Provide include files ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/vcap ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/fdma diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma_pci.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma_pci.c new file mode 100644 index 000000000000..a92862b386ab --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma_pci.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "fdma_api.h" +#include "lan966x_main.h" + +static int lan966x_fdma_pci_dataptr_cb(struct fdma *fdma, int dcb, int db, + u64 *dataptr) +{ + u64 addr; + + addr = fdma_dataptr_dma_addr_contiguous(fdma, dcb, db); + + *dataptr = fdma_pci_atu_translate_addr(fdma->atu_region, addr); + + return 0; +} + +static int lan966x_fdma_pci_nextptr_cb(struct fdma *fdma, int dcb, u64 *nextptr) +{ + u64 addr; + + fdma_nextptr_cb(fdma, dcb, &addr); + + *nextptr = fdma_pci_atu_translate_addr(fdma->atu_region, addr); + + return 0; +} + +static int lan966x_fdma_pci_rx_alloc(struct lan966x_rx *rx) +{ + struct lan966x *lan966x = rx->lan966x; + struct fdma *fdma = &rx->fdma; + int err; + + err = fdma_alloc_coherent_and_map(lan966x->dev, fdma, &lan966x->atu); + if (err) + return err; + + fdma_dcbs_init(fdma, + FDMA_DCB_INFO_DATAL(fdma->db_size), + FDMA_DCB_STATUS_INTR); + + lan966x_fdma_llp_configure(lan966x, + fdma->atu_region->base_addr, + fdma->channel_id); + + return 0; +} + +static int lan966x_fdma_pci_tx_alloc(struct lan966x_tx *tx) +{ + struct lan966x *lan966x = tx->lan966x; + struct fdma *fdma = &tx->fdma; + int err; + + err = fdma_alloc_coherent_and_map(lan966x->dev, fdma, &lan966x->atu); + if (err) + return err; + + fdma_dcbs_init(fdma, + FDMA_DCB_INFO_DATAL(fdma->db_size), + FDMA_DCB_STATUS_DONE); + + lan966x_fdma_llp_configure(lan966x, + fdma->atu_region->base_addr, + fdma->channel_id); + + return 0; +} + +static int lan966x_fdma_pci_rx_check_frame(struct lan966x_rx *rx, u64 *src_port) +{ + struct lan966x *lan966x = rx->lan966x; + struct fdma *fdma = &rx->fdma; + void *virt_addr; + + virt_addr = fdma_dataptr_virt_addr_contiguous(fdma, + fdma->dcb_index, + fdma->db_index); + + lan966x_ifh_get_src_port(virt_addr, src_port); + + if (WARN_ON(*src_port >= lan966x->num_phys_ports)) + return FDMA_ERROR; + + return FDMA_PASS; +} + +static struct sk_buff *lan966x_fdma_pci_rx_get_frame(struct lan966x_rx *rx, + u64 src_port) +{ + struct lan966x *lan966x = rx->lan966x; + struct fdma *fdma = &rx->fdma; + struct sk_buff *skb; + struct fdma_db *db; + u32 data_len; + + /* Get the received frame and create an SKB for it. */ + db = fdma_db_next_get(fdma); + data_len = FDMA_DCB_STATUS_BLOCKL(db->status); + + skb = napi_alloc_skb(&lan966x->napi, data_len); + if (unlikely(!skb)) + return NULL; + + memcpy(skb->data, + fdma_dataptr_virt_addr_contiguous(fdma, + fdma->dcb_index, + fdma->db_index), + data_len); + + skb_put(skb, data_len); + + skb->dev = lan966x->ports[src_port]->dev; + skb_pull(skb, IFH_LEN_BYTES); + + if (likely(!(skb->dev->features & NETIF_F_RXFCS))) + skb_trim(skb, skb->len - ETH_FCS_LEN); + + skb->protocol = eth_type_trans(skb, skb->dev); + + if (lan966x->bridge_mask & BIT(src_port)) { + skb->offload_fwd_mark = 1; + + skb_reset_network_header(skb); + if (!lan966x_hw_offload(lan966x, src_port, skb)) + skb->offload_fwd_mark = 0; + } + + skb->dev->stats.rx_bytes += skb->len; + skb->dev->stats.rx_packets++; + + return skb; +} + +static int lan966x_fdma_pci_get_next_dcb(struct fdma *fdma) +{ + struct fdma_db *db; + + for (int i = 0; i < fdma->n_dcbs; i++) { + db = fdma_db_get(fdma, i, 0); + + if (!fdma_db_is_done(db)) + continue; + if (fdma_is_last(fdma, &fdma->dcbs[i])) + continue; + + return i; + } + + return -1; +} + +static int lan966x_fdma_pci_xmit(struct sk_buff *skb, __be32 *ifh, + struct net_device *dev) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + struct lan966x_tx *tx = &lan966x->tx; + struct fdma *fdma = &tx->fdma; + int next_to_use; + void *virt_addr; + + next_to_use = lan966x_fdma_pci_get_next_dcb(fdma); + + if (next_to_use < 0) { + netif_stop_queue(dev); + return NETDEV_TX_BUSY; + } + + if (skb_put_padto(skb, ETH_ZLEN)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + skb_tx_timestamp(skb); + + virt_addr = fdma_dataptr_virt_addr_contiguous(fdma, next_to_use, 0); + memcpy(virt_addr, ifh, IFH_LEN_BYTES); + memcpy((u8 *)virt_addr + IFH_LEN_BYTES, skb->data, skb->len); + + fdma_dcb_add(fdma, + next_to_use, + 0, + FDMA_DCB_STATUS_SOF | + FDMA_DCB_STATUS_EOF | + FDMA_DCB_STATUS_BLOCKO(0) | + FDMA_DCB_STATUS_BLOCKL(IFH_LEN_BYTES + skb->len + ETH_FCS_LEN)); + + /* Start the transmission. */ + lan966x_fdma_tx_start(tx); + + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + + dev_consume_skb_any(skb); + + return NETDEV_TX_OK; +} + +static int lan966x_fdma_pci_napi_poll(struct napi_struct *napi, int weight) +{ + struct lan966x *lan966x = container_of(napi, struct lan966x, napi); + struct lan966x_rx *rx = &lan966x->rx; + struct fdma *fdma = &rx->fdma; + int dcb_reload, old_dcb; + struct sk_buff *skb; + int counter = 0; + u64 src_port; + + /* Wake any stopped TX queues if a TX DCB is available. */ + if (lan966x_fdma_pci_get_next_dcb(&lan966x->tx.fdma) >= 0) + lan966x_fdma_wakeup_netdev(lan966x); + + dcb_reload = fdma->dcb_index; + + /* Get all received skbs. */ + while (counter < weight) { + if (!fdma_has_frames(fdma)) + break; + counter++; + switch (lan966x_fdma_pci_rx_check_frame(rx, &src_port)) { + case FDMA_PASS: + break; + case FDMA_ERROR: + fdma_dcb_advance(fdma); + goto allocate_new; + } + skb = lan966x_fdma_pci_rx_get_frame(rx, src_port); + fdma_dcb_advance(fdma); + if (!skb) + goto allocate_new; + + napi_gro_receive(&lan966x->napi, skb); + } +allocate_new: + while (dcb_reload != fdma->dcb_index) { + old_dcb = dcb_reload; + dcb_reload++; + dcb_reload &= fdma->n_dcbs - 1; + + fdma_dcb_add(fdma, + old_dcb, + FDMA_DCB_INFO_DATAL(fdma->db_size), + FDMA_DCB_STATUS_INTR); + + lan966x_fdma_rx_reload(rx); + } + + if (counter < weight && napi_complete_done(napi, counter)) + lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA); + + return counter; +} + +static int lan966x_fdma_pci_init(struct lan966x *lan966x) +{ + struct fdma *rx_fdma = &lan966x->rx.fdma; + struct fdma *tx_fdma = &lan966x->tx.fdma; + int err; + + if (!lan966x->fdma) + return 0; + + fdma_pci_atu_init(&lan966x->atu, lan966x->regs[TARGET_PCIE_DBI]); + + lan966x->rx.lan966x = lan966x; + lan966x->rx.max_mtu = lan966x_fdma_get_max_frame(lan966x); + rx_fdma->channel_id = FDMA_XTR_CHANNEL; + rx_fdma->n_dcbs = FDMA_DCB_MAX; + rx_fdma->n_dbs = FDMA_RX_DCB_MAX_DBS; + rx_fdma->priv = lan966x; + rx_fdma->db_size = FDMA_PCI_DB_SIZE(lan966x->rx.max_mtu); + rx_fdma->size = fdma_get_size_contiguous(rx_fdma); + rx_fdma->ops.nextptr_cb = &lan966x_fdma_pci_nextptr_cb; + rx_fdma->ops.dataptr_cb = &lan966x_fdma_pci_dataptr_cb; + + lan966x->tx.lan966x = lan966x; + tx_fdma->channel_id = FDMA_INJ_CHANNEL; + tx_fdma->n_dcbs = FDMA_DCB_MAX; + tx_fdma->n_dbs = FDMA_TX_DCB_MAX_DBS; + tx_fdma->priv = lan966x; + tx_fdma->db_size = FDMA_PCI_DB_SIZE(lan966x->rx.max_mtu); + tx_fdma->size = fdma_get_size_contiguous(tx_fdma); + tx_fdma->ops.nextptr_cb = &lan966x_fdma_pci_nextptr_cb; + tx_fdma->ops.dataptr_cb = &lan966x_fdma_pci_dataptr_cb; + + err = lan966x_fdma_pci_rx_alloc(&lan966x->rx); + if (err) + return err; + + err = lan966x_fdma_pci_tx_alloc(&lan966x->tx); + if (err) { + fdma_free_coherent_and_unmap(lan966x->dev, rx_fdma); + return err; + } + + lan966x_fdma_rx_start(&lan966x->rx); + + return 0; +} + +static int lan966x_fdma_pci_resize(struct lan966x *lan966x) +{ + return -EOPNOTSUPP; +} + +static void lan966x_fdma_pci_deinit(struct lan966x *lan966x) +{ + if (!lan966x->fdma) + return; + + lan966x_fdma_rx_disable(&lan966x->rx); + lan966x_fdma_tx_disable(&lan966x->tx); + + napi_synchronize(&lan966x->napi); + napi_disable(&lan966x->napi); + + fdma_free_coherent_and_unmap(lan966x->dev, &lan966x->rx.fdma); + fdma_free_coherent_and_unmap(lan966x->dev, &lan966x->tx.fdma); +} + +const struct lan966x_fdma_ops lan966x_fdma_pci_ops = { + .fdma_init = &lan966x_fdma_pci_init, + .fdma_deinit = &lan966x_fdma_pci_deinit, + .fdma_xmit = &lan966x_fdma_pci_xmit, + .fdma_poll = &lan966x_fdma_pci_napi_poll, + .fdma_resize = &lan966x_fdma_pci_resize, +}; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 9f69634ebb0a..fc14738774ec 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,9 @@ struct lan966x_main_io_resource { static const struct lan966x_main_io_resource lan966x_main_iomap[] = { { TARGET_CPU, 0xc0000, 0 }, /* 0xe00c0000 */ { TARGET_FDMA, 0xc0400, 0 }, /* 0xe00c0400 */ +#if IS_ENABLED(CONFIG_MCHP_LAN966X_PCI) + { TARGET_PCIE_DBI, 0x400000, 0 }, /* 0xe0400000 */ +#endif { TARGET_ORG, 0, 1 }, /* 0xe2000000 */ { TARGET_GCB, 0x4000, 1 }, /* 0xe2004000 */ { TARGET_QS, 0x8000, 1 }, /* 0xe2008000 */ @@ -1089,6 +1093,13 @@ static int lan966x_reset_switch(struct lan966x *lan966x) static const struct lan966x_fdma_ops *lan966x_get_fdma_ops(struct device *dev) { +#if IS_ENABLED(CONFIG_MCHP_LAN966X_PCI) + struct device *parent = dev->parent; + + if (parent && parent->parent && dev_is_pci(parent->parent)) + return &lan966x_fdma_pci_ops; +#endif + return &lan966x_fdma_ops; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index ed2707079d3e..8fcc51133417 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -17,6 +17,9 @@ #include #include +#if IS_ENABLED(CONFIG_MCHP_LAN966X_PCI) +#include +#endif #include #include @@ -288,6 +291,10 @@ struct lan966x { void __iomem *regs[NUM_TARGETS]; +#if IS_ENABLED(CONFIG_MCHP_LAN966X_PCI) + struct fdma_pci_atu atu; +#endif + int shared_queue_sz; u8 base_mac[ETH_ALEN]; @@ -587,6 +594,10 @@ void lan966x_fdma_stop_netdev(struct lan966x *lan966x); int lan966x_fdma_get_max_frame(struct lan966x *lan966x); int lan966x_qsys_sw_status(struct lan966x *lan966x); +#if IS_ENABLED(CONFIG_MCHP_LAN966X_PCI) +extern const struct lan966x_fdma_ops lan966x_fdma_pci_ops; +#endif + int lan966x_lag_port_join(struct lan966x_port *port, struct net_device *brport_dev, struct net_device *bond, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 4b553927d2e0..f9448780bd4f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -20,6 +20,7 @@ enum lan966x_target { TARGET_FDMA = 21, TARGET_GCB = 27, TARGET_ORG = 36, + TARGET_PCIE_DBI = 40, TARGET_PTP = 41, TARGET_QS = 42, TARGET_QSYS = 46, -- 2.34.1