In eDMA-backed mode (when using ntb_transport_edma), NTB transport can scale throughput across multiple queue pairs without being constrained by scarce BAR/memory window space used for data-plane buffers. It contrasts with the default ntb_transport, where even with a single queue pair, only up to 15 in-flight descriptors fit in a 1 MiB MW. Teach ntb_netdev to allocate multiple ntb_transport queue pairs and expose them as a multi-queue net_device. With this patch, up to N queue pairs are created, where N is chosen as follows: - By default, N is num_online_cpus(), to give each CPU its own queue. - If the ntb_num_queues module parameter is non-zero, it overrides the default and requests that many queues. - In both cases the requested value is capped at a fixed upper bound to avoid unbounded allocations, and by the number of queue pairs actually available from ntb_transport. If only one queue pair can be created (or ntb_num_queues=1 is set), the driver effectively falls back to the previous single-queue behavior. Signed-off-by: Koichiro Den --- drivers/net/ntb_netdev.c | 341 ++++++++++++++++++++++++++++----------- 1 file changed, 243 insertions(+), 98 deletions(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index fbeae05817e9..fc300db66ef7 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -53,6 +53,8 @@ #include #include #include +#include +#include #define NTB_NETDEV_VER "0.7" @@ -70,26 +72,84 @@ static unsigned int tx_start = 10; /* Number of descriptors still available before stop upper layer tx */ static unsigned int tx_stop = 5; +/* + * Upper bound on how many queue pairs we will try to create even if + * ntb_num_queues or num_online_cpus() is very large. This is an + * arbitrary safety cap to avoid unbounded allocations. + */ +#define NTB_NETDEV_MAX_QUEUES 64 + +/* + * ntb_num_queues == 0 (default) means: + * - use num_online_cpus() as the desired queue count, capped by + * NTB_NETDEV_MAX_QUEUES. + * ntb_num_queues > 0: + * - try to create exactly ntb_num_queues queue pairs (again capped + * by NTB_NETDEV_MAX_QUEUES), but fall back to the number of queue + * pairs actually available from ntb_transport. + */ +static unsigned int ntb_num_queues; +module_param(ntb_num_queues, uint, 0644); +MODULE_PARM_DESC(ntb_num_queues, + "Number of NTB netdev queue pairs to use (0 = per-CPU)"); + +struct ntb_netdev; + +struct ntb_netdev_queue { + struct ntb_netdev *ntdev; + struct ntb_transport_qp *qp; + struct timer_list tx_timer; + u16 qid; +}; + struct ntb_netdev { struct pci_dev *pdev; struct net_device *ndev; - struct ntb_transport_qp *qp; - struct timer_list tx_timer; + unsigned int num_queues; + struct ntb_netdev_queue *queues; }; #define NTB_TX_TIMEOUT_MS 1000 #define NTB_RXQ_SIZE 100 +static unsigned int ntb_netdev_default_queues(void) +{ + unsigned int n; + + if (ntb_num_queues) + n = ntb_num_queues; + else + n = num_online_cpus(); + + if (!n) + n = 1; + + if (n > NTB_NETDEV_MAX_QUEUES) + n = NTB_NETDEV_MAX_QUEUES; + + return n; +} + static void ntb_netdev_event_handler(void *data, int link_is_up) { - struct net_device *ndev = data; - struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev_queue *q = data; + struct ntb_netdev *dev = q->ntdev; + struct net_device *ndev = dev->ndev; + bool any_up = false; + unsigned int i; - netdev_dbg(ndev, "Event %x, Link %x\n", link_is_up, - ntb_transport_link_query(dev->qp)); + netdev_dbg(ndev, "Event %x, Link %x, qp %u\n", link_is_up, + ntb_transport_link_query(q->qp), q->qid); if (link_is_up) { - if (ntb_transport_link_query(dev->qp)) + for (i = 0; i < dev->num_queues; i++) { + if (ntb_transport_link_query(dev->queues[i].qp)) { + any_up = true; + break; + } + } + + if (any_up) netif_carrier_on(ndev); } else { netif_carrier_off(ndev); @@ -99,7 +159,9 @@ static void ntb_netdev_event_handler(void *data, int link_is_up) static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { - struct net_device *ndev = qp_data; + struct ntb_netdev_queue *q = qp_data; + struct ntb_netdev *dev = q->ntdev; + struct net_device *ndev = dev->ndev; struct sk_buff *skb; int rc; @@ -135,7 +197,8 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, } enqueue_again: - rc = ntb_transport_rx_enqueue(qp, skb, skb->data, ndev->mtu + ETH_HLEN); + rc = ntb_transport_rx_enqueue(q->qp, skb, skb->data, + ndev->mtu + ETH_HLEN); if (rc) { dev_kfree_skb_any(skb); ndev->stats.rx_errors++; @@ -143,42 +206,37 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, } } -static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev, - struct ntb_transport_qp *qp, int size) +static int ntb_netdev_maybe_stop_tx(struct ntb_netdev_queue *q, int size) { - struct ntb_netdev *dev = netdev_priv(netdev); + struct net_device *ndev = q->ntdev->ndev; + + if (ntb_transport_tx_free_entry(q->qp) >= size) + return 0; + + netif_stop_subqueue(ndev, q->qid); - netif_stop_queue(netdev); /* Make sure to see the latest value of ntb_transport_tx_free_entry() * since the queue was last started. */ smp_mb(); - if (likely(ntb_transport_tx_free_entry(qp) < size)) { - mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time)); + if (likely(ntb_transport_tx_free_entry(q->qp) < size)) { + mod_timer(&q->tx_timer, jiffies + usecs_to_jiffies(tx_time)); return -EBUSY; } - netif_start_queue(netdev); - return 0; -} - -static int ntb_netdev_maybe_stop_tx(struct net_device *ndev, - struct ntb_transport_qp *qp, int size) -{ - if (netif_queue_stopped(ndev) || - (ntb_transport_tx_free_entry(qp) >= size)) - return 0; + netif_wake_subqueue(ndev, q->qid); - return __ntb_netdev_maybe_stop_tx(ndev, qp, size); + return 0; } static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { - struct net_device *ndev = qp_data; + struct ntb_netdev_queue *q = qp_data; + struct ntb_netdev *dev = q->ntdev; + struct net_device *ndev = dev->ndev; struct sk_buff *skb; - struct ntb_netdev *dev = netdev_priv(ndev); skb = data; if (!skb || !ndev) @@ -194,13 +252,12 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, dev_kfree_skb_any(skb); - if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) { + if (ntb_transport_tx_free_entry(qp) >= tx_start) { /* Make sure anybody stopping the queue after this sees the new * value of ntb_transport_tx_free_entry() */ smp_mb(); - if (netif_queue_stopped(ndev)) - netif_wake_queue(ndev); + netif_wake_subqueue(ndev, q->qid); } } @@ -208,16 +265,26 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); + u16 qid = skb_get_queue_mapping(skb); + struct ntb_netdev_queue *q; int rc; - ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop); + if (unlikely(!dev->num_queues)) + goto err; + + if (unlikely(qid >= dev->num_queues)) + qid = qid % dev->num_queues; - rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len); + q = &dev->queues[qid]; + + ntb_netdev_maybe_stop_tx(q, tx_stop); + + rc = ntb_transport_tx_enqueue(q->qp, skb, skb->data, skb->len); if (rc) goto err; /* check for next submit */ - ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop); + ntb_netdev_maybe_stop_tx(q, tx_stop); return NETDEV_TX_OK; @@ -229,80 +296,103 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, static void ntb_netdev_tx_timer(struct timer_list *t) { - struct ntb_netdev *dev = timer_container_of(dev, t, tx_timer); + struct ntb_netdev_queue *q = container_of(t, struct ntb_netdev_queue, tx_timer); + struct ntb_netdev *dev = q->ntdev; struct net_device *ndev = dev->ndev; - if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) { - mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time)); + if (ntb_transport_tx_free_entry(q->qp) < tx_stop) { + mod_timer(&q->tx_timer, jiffies + usecs_to_jiffies(tx_time)); } else { - /* Make sure anybody stopping the queue after this sees the new + /* + * Make sure anybody stopping the queue after this sees the new * value of ntb_transport_tx_free_entry() */ smp_mb(); - if (netif_queue_stopped(ndev)) - netif_wake_queue(ndev); + netif_wake_subqueue(ndev, q->qid); } } static int ntb_netdev_open(struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev_queue *queue; struct sk_buff *skb; - int rc, i, len; - - /* Add some empty rx bufs */ - for (i = 0; i < NTB_RXQ_SIZE; i++) { - skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN); - if (!skb) { - rc = -ENOMEM; - goto err; - } + int rc = 0, i, len; + unsigned int q; - rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data, - ndev->mtu + ETH_HLEN); - if (rc) { - dev_kfree_skb(skb); - goto err; + /* Add some empty rx bufs for each queue */ + for (q = 0; q < dev->num_queues; q++) { + queue = &dev->queues[q]; + + for (i = 0; i < NTB_RXQ_SIZE; i++) { + skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN); + if (!skb) { + rc = -ENOMEM; + goto err; + } + + rc = ntb_transport_rx_enqueue(queue->qp, skb, skb->data, + ndev->mtu + ETH_HLEN); + if (rc) { + dev_kfree_skb(skb); + goto err; + } } - } - timer_setup(&dev->tx_timer, ntb_netdev_tx_timer, 0); + timer_setup(&queue->tx_timer, ntb_netdev_tx_timer, 0); + } netif_carrier_off(ndev); - ntb_transport_link_up(dev->qp); - netif_start_queue(ndev); + + for (q = 0; q < dev->num_queues; q++) + ntb_transport_link_up(dev->queues[q].qp); + + netif_tx_start_all_queues(ndev); return 0; err: - while ((skb = ntb_transport_rx_remove(dev->qp, &len))) - dev_kfree_skb(skb); + for (q = 0; q < dev->num_queues; q++) { + queue = &dev->queues[q]; + + while ((skb = ntb_transport_rx_remove(queue->qp, &len))) + dev_kfree_skb(skb); + } return rc; } static int ntb_netdev_close(struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev_queue *queue; struct sk_buff *skb; + unsigned int q; int len; - ntb_transport_link_down(dev->qp); + netif_tx_stop_all_queues(ndev); + + for (q = 0; q < dev->num_queues; q++) { + queue = &dev->queues[q]; - while ((skb = ntb_transport_rx_remove(dev->qp, &len))) - dev_kfree_skb(skb); + ntb_transport_link_down(queue->qp); - timer_delete_sync(&dev->tx_timer); + while ((skb = ntb_transport_rx_remove(queue->qp, &len))) + dev_kfree_skb(skb); + timer_delete_sync(&queue->tx_timer); + } return 0; } static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu) { struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev_queue *queue; struct sk_buff *skb; - int len, rc; + unsigned int q, i; + int len, rc = 0; - if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN) + if (new_mtu > ntb_transport_max_size(dev->queues[0].qp) - ETH_HLEN) return -EINVAL; if (!netif_running(ndev)) { @@ -311,41 +401,54 @@ static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu) } /* Bring down the link and dispose of posted rx entries */ - ntb_transport_link_down(dev->qp); + for (q = 0; q < dev->num_queues; q++) + ntb_transport_link_down(dev->queues[q].qp); if (ndev->mtu < new_mtu) { - int i; - - for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++) - dev_kfree_skb(skb); + for (q = 0; q < dev->num_queues; q++) { + queue = &dev->queues[q]; - for (; i; i--) { - skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN); - if (!skb) { - rc = -ENOMEM; - goto err; - } - - rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data, - new_mtu + ETH_HLEN); - if (rc) { + for (i = 0; + (skb = ntb_transport_rx_remove(queue->qp, &len)); + i++) dev_kfree_skb(skb); - goto err; + + for (; i; i--) { + skb = netdev_alloc_skb(ndev, + new_mtu + ETH_HLEN); + if (!skb) { + rc = -ENOMEM; + goto err; + } + + rc = ntb_transport_rx_enqueue(queue->qp, skb, + skb->data, + new_mtu + + ETH_HLEN); + if (rc) { + dev_kfree_skb(skb); + goto err; + } } } } WRITE_ONCE(ndev->mtu, new_mtu); - ntb_transport_link_up(dev->qp); + for (q = 0; q < dev->num_queues; q++) + ntb_transport_link_up(dev->queues[q].qp); return 0; err: - ntb_transport_link_down(dev->qp); + for (q = 0; q < dev->num_queues; q++) { + struct ntb_netdev_queue *queue = &dev->queues[q]; + + ntb_transport_link_down(queue->qp); - while ((skb = ntb_transport_rx_remove(dev->qp, &len))) - dev_kfree_skb(skb); + while ((skb = ntb_transport_rx_remove(queue->qp, &len))) + dev_kfree_skb(skb); + } netdev_err(ndev, "Error changing MTU, device inoperable\n"); return rc; @@ -404,6 +507,7 @@ static int ntb_netdev_probe(struct device *client_dev) struct net_device *ndev; struct pci_dev *pdev; struct ntb_netdev *dev; + unsigned int q, desired_queues; int rc; ntb = dev_ntb(client_dev->parent); @@ -411,7 +515,9 @@ static int ntb_netdev_probe(struct device *client_dev) if (!pdev) return -ENODEV; - ndev = alloc_etherdev(sizeof(*dev)); + desired_queues = ntb_netdev_default_queues(); + + ndev = alloc_etherdev_mq(sizeof(*dev), desired_queues); if (!ndev) return -ENOMEM; @@ -420,6 +526,15 @@ static int ntb_netdev_probe(struct device *client_dev) dev = netdev_priv(ndev); dev->ndev = ndev; dev->pdev = pdev; + dev->num_queues = 0; + + dev->queues = kcalloc(desired_queues, sizeof(*dev->queues), + GFP_KERNEL); + if (!dev->queues) { + rc = -ENOMEM; + goto err_free_netdev; + } + ndev->features = NETIF_F_HIGHDMA; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -436,26 +551,51 @@ static int ntb_netdev_probe(struct device *client_dev) ndev->min_mtu = 0; ndev->max_mtu = ETH_MAX_MTU; - dev->qp = ntb_transport_create_queue(ndev, client_dev, - &ntb_netdev_handlers); - if (!dev->qp) { + for (q = 0; q < desired_queues; q++) { + struct ntb_netdev_queue *queue = &dev->queues[q]; + + queue->ntdev = dev; + queue->qid = q; + queue->qp = ntb_transport_create_queue(queue, client_dev, + &ntb_netdev_handlers); + if (!queue->qp) + break; + + dev->num_queues++; + } + + if (!dev->num_queues) { rc = -EIO; - goto err; + goto err_free_queues; } - ndev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN; + rc = netif_set_real_num_tx_queues(ndev, dev->num_queues); + if (rc) + goto err_free_qps; + + rc = netif_set_real_num_rx_queues(ndev, dev->num_queues); + if (rc) + goto err_free_qps; + + ndev->mtu = ntb_transport_max_size(dev->queues[0].qp) - ETH_HLEN; rc = register_netdev(ndev); if (rc) - goto err1; + goto err_free_qps; dev_set_drvdata(client_dev, ndev); - dev_info(&pdev->dev, "%s created\n", ndev->name); + dev_info(&pdev->dev, "%s created with %u queue pairs\n", + ndev->name, dev->num_queues); return 0; -err1: - ntb_transport_free_queue(dev->qp); -err: +err_free_qps: + for (q = 0; q < dev->num_queues; q++) + ntb_transport_free_queue(dev->queues[q].qp); + +err_free_queues: + kfree(dev->queues); + +err_free_netdev: free_netdev(ndev); return rc; } @@ -464,9 +604,14 @@ static void ntb_netdev_remove(struct device *client_dev) { struct net_device *ndev = dev_get_drvdata(client_dev); struct ntb_netdev *dev = netdev_priv(ndev); + unsigned int q; + unregister_netdev(ndev); - ntb_transport_free_queue(dev->qp); + for (q = 0; q < dev->num_queues; q++) + ntb_transport_free_queue(dev->queues[q].qp); + + kfree(dev->queues); free_netdev(ndev); } -- 2.51.0