From: Nimrod Oren Wire mlx5_frag_buf pools init/cleanup hooks into mlx5_mdev_init()/uninit() and the init unwind path. Keep temporary no-op stubs in alloc.c so lifecycle ordering is in place before the coherent DMA sub-page allocator implementation is added in follow-up patches. Signed-off-by: Nimrod Oren Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ 3 files changed, 20 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 202feab1558a..cebb3559d2c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -71,6 +71,17 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, return cpu_handle; } +/* Implemented later in the series */ +void mlx5_frag_buf_pools_cleanup(struct mlx5_core_dev *dev) +{ +} + +/* Implemented later in the series */ +int mlx5_frag_buf_pools_init(struct mlx5_core_dev *dev) +{ + return 0; +} + int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 74827e8ca125..b1b9ebfd3866 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1817,6 +1817,10 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); + err = mlx5_frag_buf_pools_init(dev); + if (err) + goto err_frag_buf_pools_init; + INIT_LIST_HEAD(&priv->traps); err = mlx5_cmd_init(dev); @@ -1878,6 +1882,8 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) err_timeout_init: mlx5_cmd_cleanup(dev); err_cmd_init: + mlx5_frag_buf_pools_cleanup(dev); +err_frag_buf_pools_init: debugfs_remove(dev->priv.dbg.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); @@ -1902,6 +1908,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mlx5_health_cleanup(dev); mlx5_tout_cleanup(dev); mlx5_cmd_cleanup(dev); + mlx5_frag_buf_pools_cleanup(dev); debugfs_remove_recursive(dev->priv.dbg.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1507e881d962..87f01c4e8d65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -436,6 +436,8 @@ mlx5_sf_coredev_to_adev(struct mlx5_core_dev *mdev) int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); void mlx5_mdev_uninit(struct mlx5_core_dev *dev); +int mlx5_frag_buf_pools_init(struct mlx5_core_dev *dev); +void mlx5_frag_buf_pools_cleanup(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); -- 2.44.0 From: Nimrod Oren Introduce mlx5 DMA pool and pool-page data structures, and add the creation and teardown paths. Each NUMA node owns a set of mlx5_dma_pool instances, each one with a different block size. The sizes are defined as all powers of two starting from MLX5_ADAPTER_PAGE_SHIFT and up to PAGE_SHIFT. Since mlx5_frag_bufs are used to back objects whose sizes are encoded relative to MLX5_ADAPTER_PAGE_SHIFT, a smaller block_shift value cannot be used. Requests larger than PAGE_SIZE continue to be handled as page-sized fragments, as in the existing frag-buf allocation model. Signed-off-by: Nimrod Oren Signed-off-by: Tariq Toukan --- .../net/ethernet/mellanox/mlx5/core/alloc.c | 116 +++++++++++++++++- include/linux/mlx5/driver.h | 7 +- 2 files changed, 119 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index cebb3559d2c9..fcc859c5f810 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -37,10 +37,15 @@ #include #include #include +#include #include #include "mlx5_core.h" +#define MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT MLX5_ADAPTER_PAGE_SHIFT +#define MLX5_FRAG_BUF_POOLS_NUM \ + (PAGE_SHIFT - MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT + 1) + struct mlx5_db_pgdir { struct list_head list; unsigned long *bitmap; @@ -48,6 +53,27 @@ struct mlx5_db_pgdir { dma_addr_t db_dma; }; +struct mlx5_dma_pool { + /* Protects page_list and per-page allocation bitmaps. */ + struct mutex lock; + struct list_head page_list; + struct mlx5_core_dev *dev; + int node; + u8 block_shift; +}; + +struct mlx5_dma_pool_page { + struct mlx5_dma_pool *pool; + struct list_head pool_link; + unsigned long *bitmap; + void *buf; + dma_addr_t dma; +}; + +struct mlx5_frag_buf_node_pools { + struct mlx5_dma_pool *pools[MLX5_FRAG_BUF_POOLS_NUM]; +}; + /* Handling for queue buffers -- we allocate a bunch of memory and * register it in a memory region at HCA virtual address 0. */ @@ -71,14 +97,100 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, return cpu_handle; } -/* Implemented later in the series */ +static void mlx5_dma_pool_destroy(struct mlx5_dma_pool *pool) +{ + mutex_destroy(&pool->lock); + kfree(pool); +} + +static struct mlx5_dma_pool *mlx5_dma_pool_create(struct mlx5_core_dev *dev, + int node, u8 block_shift) +{ + struct mlx5_dma_pool *pool; + + pool = kzalloc_obj(*pool); + if (!pool) + return NULL; + + INIT_LIST_HEAD(&pool->page_list); + mutex_init(&pool->lock); + pool->dev = dev; + pool->node = node; + pool->block_shift = block_shift; + return pool; +} + +static void +mlx5_frag_buf_node_pools_destroy(struct mlx5_frag_buf_node_pools *node_pools) +{ + for (int i = 0; i < MLX5_FRAG_BUF_POOLS_NUM; i++) + if (node_pools->pools[i]) + mlx5_dma_pool_destroy(node_pools->pools[i]); + kfree(node_pools); +} + +static struct mlx5_frag_buf_node_pools * +mlx5_frag_buf_node_pools_create(struct mlx5_core_dev *dev, int node) +{ + struct mlx5_frag_buf_node_pools *node_pools; + + node_pools = kzalloc_obj(*node_pools); + if (!node_pools) + return NULL; + + for (int i = 0; i < MLX5_FRAG_BUF_POOLS_NUM; i++) { + u8 block_shift = MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT + i; + + node_pools->pools[i] = mlx5_dma_pool_create(dev, node, + block_shift); + if (!node_pools->pools[i]) { + mlx5_frag_buf_node_pools_destroy(node_pools); + return NULL; + } + } + + return node_pools; +} + void mlx5_frag_buf_pools_cleanup(struct mlx5_core_dev *dev) { + struct mlx5_priv *priv = &dev->priv; + int node; + + for_each_node_state(node, N_POSSIBLE) { + struct mlx5_frag_buf_node_pools *node_pools; + + node_pools = priv->frag_buf_node_pools[node]; + if (!node_pools) + continue; + mlx5_frag_buf_node_pools_destroy(node_pools); + } + + kfree(priv->frag_buf_node_pools); + priv->frag_buf_node_pools = NULL; } -/* Implemented later in the series */ int mlx5_frag_buf_pools_init(struct mlx5_core_dev *dev) { + struct mlx5_priv *priv = &dev->priv; + int node; + + priv->frag_buf_node_pools = kzalloc_objs(*priv->frag_buf_node_pools, + nr_node_ids); + if (!priv->frag_buf_node_pools) + return -ENOMEM; + + for_each_node_state(node, N_POSSIBLE) { + struct mlx5_frag_buf_node_pools *node_pools; + + node_pools = mlx5_frag_buf_node_pools_create(dev, node); + if (!node_pools) { + mlx5_frag_buf_pools_cleanup(dev); + return -ENOMEM; + } + priv->frag_buf_node_pools[node] = node_pools; + } + return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 04b96c5abb57..71f7615ab553 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -558,6 +558,7 @@ enum mlx5_func_type { MLX5_FUNC_TYPE_NUM, }; +struct mlx5_frag_buf_node_pools; struct mlx5_ft_pool; struct mlx5_priv { /* IRQ table valid only for real pci devices PF or VF */ @@ -581,14 +582,16 @@ struct mlx5_priv { struct mlx5_debugfs_entries dbg; - /* start: alloc staff */ + /* start: alloc stuff */ /* protect buffer allocation according to numa node */ struct mutex alloc_mutex; int numa_node; struct mutex pgdir_mutex; struct list_head pgdir_list; - /* end: alloc staff */ + + struct mlx5_frag_buf_node_pools **frag_buf_node_pools; + /* end: alloc stuff */ struct mlx5_adev **adev; int adev_idx; -- 2.44.0 From: Nimrod Oren Add mlx5_dma_pool alloc/free paths, and wire mlx5_frag_buf allocation and free paths to use them. mlx5_frag_buf_alloc_node() now selects an mlx5_dma_pool to allocate fragments from, instead of directly allocating full coherent pages. mlx5_frag_buf_free() frees from the respective pool. mlx5_dma_pool_alloc() keeps allocation fast by maintaining pages with available indexes at the head of the list, so the common allocation path can take a free index immediately. New backing pages are allocated only when no free index is available. mlx5_dma_pool_free() returns released indexes to the pool and frees a backing page once all of its indexes become free. This avoids keeping fully free pages for the lifetime of the pool and reduces coherent DMA memory footprint. Signed-off-by: Nimrod Oren Signed-off-by: Tariq Toukan --- .../net/ethernet/mellanox/mlx5/core/alloc.c | 170 ++++++++++++++---- include/linux/mlx5/driver.h | 2 + 2 files changed, 140 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index fcc859c5f810..f19644183828 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -120,6 +120,111 @@ static struct mlx5_dma_pool *mlx5_dma_pool_create(struct mlx5_core_dev *dev, return pool; } +static struct mlx5_dma_pool_page * +mlx5_dma_pool_page_alloc(struct mlx5_dma_pool *pool) +{ + int blocks_per_page = BIT(PAGE_SHIFT - pool->block_shift); + struct mlx5_dma_pool_page *page; + + page = kzalloc_obj(*page); + if (!page) + goto err_out; + + page->pool = pool; + page->bitmap = bitmap_zalloc(blocks_per_page, GFP_KERNEL); + if (!page->bitmap) + goto err_free_page; + + bitmap_fill(page->bitmap, blocks_per_page); + page->buf = mlx5_dma_zalloc_coherent_node(pool->dev, PAGE_SIZE, + &page->dma, pool->node); + if (!page->buf) + goto err_free_bitmap; + + return page; + +err_free_bitmap: + bitmap_free(page->bitmap); +err_free_page: + kfree(page); +err_out: + return NULL; +} + +static void mlx5_dma_pool_page_free(struct mlx5_core_dev *dev, + struct mlx5_dma_pool_page *page) +{ + dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, page->buf, + page->dma); + bitmap_free(page->bitmap); + kfree(page); +} + +static int mlx5_dma_pool_alloc_from_page(struct mlx5_dma_pool *pool, + struct mlx5_dma_pool_page *page, + unsigned long *idx_out) +{ + int blocks_per_page = BIT(PAGE_SHIFT - pool->block_shift); + + *idx_out = find_first_bit(page->bitmap, blocks_per_page); + if (*idx_out >= blocks_per_page) + return -ENOMEM; + + __clear_bit(*idx_out, page->bitmap); + + if (bitmap_empty(page->bitmap, blocks_per_page)) + list_move_tail(&page->pool_link, &pool->page_list); + + return 0; +} + +static struct mlx5_dma_pool_page * +mlx5_dma_pool_alloc(struct mlx5_dma_pool *pool, unsigned long *idx_out) +{ + struct mlx5_dma_pool_page *page; + + mutex_lock(&pool->lock); + + page = list_first_entry_or_null(&pool->page_list, + struct mlx5_dma_pool_page, pool_link); + if (page && !mlx5_dma_pool_alloc_from_page(pool, page, idx_out)) + goto unlock; /* successfully allocated from existing page */ + + page = mlx5_dma_pool_page_alloc(pool); + if (!page) + goto unlock; + + list_add(&page->pool_link, &pool->page_list); + mlx5_dma_pool_alloc_from_page(pool, page, idx_out); + +unlock: + mutex_unlock(&pool->lock); + return page; +} + +static void mlx5_dma_pool_free(struct mlx5_dma_pool *pool, + struct mlx5_dma_pool_page *page, + unsigned long idx) +{ + int blocks_per_page = BIT(PAGE_SHIFT - pool->block_shift); + bool was_full; + + mutex_lock(&pool->lock); + was_full = bitmap_empty(page->bitmap, blocks_per_page); + __set_bit(idx, page->bitmap); + + if (bitmap_full(page->bitmap, blocks_per_page)) { + list_del(&page->pool_link); + mlx5_dma_pool_page_free(pool->dev, page); + } else { + memset((u8 *)page->buf + (idx << pool->block_shift), 0, + BIT(pool->block_shift)); + if (was_full) + list_move(&page->pool_link, &pool->page_list); + } + mutex_unlock(&pool->lock); +} + static void mlx5_frag_buf_node_pools_destroy(struct mlx5_frag_buf_node_pools *node_pools) { @@ -197,56 +302,57 @@ int mlx5_frag_buf_pools_init(struct mlx5_core_dev *dev) int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node) { - int i; + struct mlx5_dma_pool *pool; + int pool_idx; + + node = node == NUMA_NO_NODE ? first_online_node : node; buf->size = size; buf->npages = DIV_ROUND_UP(size, PAGE_SIZE); - buf->page_shift = PAGE_SHIFT; - buf->frags = kzalloc_objs(struct mlx5_buf_list, buf->npages); + buf->page_shift = clamp_t(int, order_base_2(size), + MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT, + PAGE_SHIFT); + buf->frags = kcalloc_node(buf->npages, sizeof(*buf->frags), + GFP_KERNEL, node); if (!buf->frags) - goto err_out; + return -ENOMEM; - for (i = 0; i < buf->npages; i++) { + pool_idx = buf->page_shift - MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT; + pool = dev->priv.frag_buf_node_pools[node]->pools[pool_idx]; + for (int i = 0; i < buf->npages; i++) { struct mlx5_buf_list *frag = &buf->frags[i]; - int frag_sz = min_t(int, size, PAGE_SIZE); + struct mlx5_dma_pool_page *page; + unsigned long idx; - frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz, - &frag->map, node); - if (!frag->buf) - goto err_free_buf; - if (frag->map & ((1 << buf->page_shift) - 1)) { - dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, - buf->frags[i].buf, buf->frags[i].map); - mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n", - &frag->map, buf->page_shift); - goto err_free_buf; + page = mlx5_dma_pool_alloc(pool, &idx); + if (!page) { + mlx5_frag_buf_free(dev, buf); + return -ENOMEM; } - size -= frag_sz; + frag->buf = (u8 *)page->buf + (idx << pool->block_shift); + frag->map = page->dma + (idx << pool->block_shift); + frag->frag_page = page; } return 0; - -err_free_buf: - while (i--) - dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf, - buf->frags[i].map); - kfree(buf->frags); -err_out: - return -ENOMEM; } EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) { - int size = buf->size; - int i; + for (int i = 0; i < buf->npages; i++) { + struct mlx5_buf_list *frag = &buf->frags[i]; + struct mlx5_dma_pool_page *page; + struct mlx5_dma_pool *pool; + unsigned long idx; - for (i = 0; i < buf->npages; i++) { - int frag_sz = min_t(int, size, PAGE_SIZE); + if (!frag->buf) + continue; - dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf, - buf->frags[i].map); - size -= frag_sz; + page = frag->frag_page; + pool = page->pool; + idx = (frag->map - page->dma) >> pool->block_shift; + mlx5_dma_pool_free(pool, page, idx); } kfree(buf->frags); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 71f7615ab553..531ce66fc8ef 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -343,9 +343,11 @@ struct mlx5_cmd_mailbox { struct mlx5_cmd_mailbox *next; }; +struct mlx5_dma_pool_page; struct mlx5_buf_list { void *buf; dma_addr_t map; + struct mlx5_dma_pool_page *frag_page; }; struct mlx5_frag_buf { -- 2.44.0