The calls to dma_map_sg() were passing sg_nents() for the @nents parameter, then error-ing out if more than one @nr_sgs were returned. Furthermore, there are no use-cases for iaa_crypto that allow multiple SG lists to be mapped for dma at once. Moreover, as per Herbert's direction in [1] for the batching API from higher mm layers to interface with crypto using SG lists, batching within iaa_crypto will rely on there being exactly one SG list per "unit" of [de]compression in a batch, where the component SG lists are obtained by breaking down the @req->src and @req->dst. Given all of the above, this patch simplifies the design by expecting only 1 @nents in req->src and req->dst, which aligns with current and batching use cases that will be developed in subsequent patches. This alleviates the latency penalty of calling sg_nents() per [de]compress op submitted to the hardware. Some unlikely() annotations are added to conditionals in the core [de]compress routines to further improve latency per op. [1]: https://lore.kernel.org/all/aJ7Fk6RpNc815Ivd@gondor.apana.org.au/T/#m99aea2ce3d284e6c5a3253061d97b08c4752a798 Signed-off-by: Kanchana P Sridhar --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 54 +++++++++++----------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index dd7c4831e092..16b071058f2b 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1514,11 +1514,11 @@ static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, int ret = 0; int nr_sgs; - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); + dma_unmap_sg(dev, req->dst, 1, DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->src, 1, DMA_TO_DEVICE); - nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); - if (nr_sgs <= 0 || nr_sgs > 1) { + nr_sgs = dma_map_sg(dev, req->src, 1, DMA_FROM_DEVICE); + if (unlikely(nr_sgs <= 0 || nr_sgs > 1)) { dev_dbg(dev, "verify: couldn't map src sg for iaa device %d," " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, iaa_wq->wq->id, ret); @@ -1530,13 +1530,13 @@ static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, " req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs, req->src, req->slen, sg_dma_len(req->src)); - nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE); - if (nr_sgs <= 0 || nr_sgs > 1) { + nr_sgs = dma_map_sg(dev, req->dst, 1, DMA_TO_DEVICE); + if (unlikely(nr_sgs <= 0 || nr_sgs > 1)) { dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d," " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, iaa_wq->wq->id, ret); ret = -EIO; - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->src, 1, DMA_FROM_DEVICE); goto out; } *dst_addr = sg_dma_address(req->dst); @@ -1704,14 +1704,14 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc, err = -EIO; } - dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE); - dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE); + dma_unmap_sg(dev, ctx->req->dst, 1, DMA_TO_DEVICE); + dma_unmap_sg(dev, ctx->req->src, 1, DMA_FROM_DEVICE); goto out; } err: - dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE); - dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE); + dma_unmap_sg(dev, ctx->req->dst, 1, DMA_FROM_DEVICE); + dma_unmap_sg(dev, ctx->req->src, 1, DMA_TO_DEVICE); out: if (ret != 0) dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret); @@ -2014,8 +2014,8 @@ static int iaa_comp_acompress(struct acomp_req *req) dev = &wq->idxd->pdev->dev; - nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); - if (nr_sgs <= 0 || nr_sgs > 1) { + nr_sgs = dma_map_sg(dev, req->src, 1, DMA_TO_DEVICE); + if (unlikely(nr_sgs <= 0 || nr_sgs > 1)) { dev_dbg(dev, "couldn't map src sg for iaa device %d," " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, iaa_wq->wq->id, ret); @@ -2024,8 +2024,8 @@ static int iaa_comp_acompress(struct acomp_req *req) } src_addr = sg_dma_address(req->src); - nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); - if (nr_sgs <= 0 || nr_sgs > 1) { + nr_sgs = dma_map_sg(dev, req->dst, 1, DMA_FROM_DEVICE); + if (unlikely(nr_sgs <= 0 || nr_sgs > 1)) { dev_dbg(dev, "couldn't map dst sg for iaa device %d," " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, iaa_wq->wq->id, ret); @@ -2051,18 +2051,18 @@ static int iaa_comp_acompress(struct acomp_req *req) if (ret) dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret); - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE); - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->dst, 1, DMA_TO_DEVICE); + dma_unmap_sg(dev, req->src, 1, DMA_FROM_DEVICE); goto out; } - if (ret) + if (unlikely(ret)) dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret); - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->dst, 1, DMA_FROM_DEVICE); err_map_dst: - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); + dma_unmap_sg(dev, req->src, 1, DMA_TO_DEVICE); out: percpu_ref_put(&iaa_wq->ref); @@ -2095,8 +2095,8 @@ static int iaa_comp_adecompress(struct acomp_req *req) dev = &wq->idxd->pdev->dev; - nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); - if (nr_sgs <= 0 || nr_sgs > 1) { + nr_sgs = dma_map_sg(dev, req->src, 1, DMA_TO_DEVICE); + if (unlikely(nr_sgs <= 0 || nr_sgs > 1)) { dev_dbg(dev, "couldn't map src sg for iaa device %d," " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, iaa_wq->wq->id, ret); @@ -2105,8 +2105,8 @@ static int iaa_comp_adecompress(struct acomp_req *req) } src_addr = sg_dma_address(req->src); - nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); - if (nr_sgs <= 0 || nr_sgs > 1) { + nr_sgs = dma_map_sg(dev, req->dst, 1, DMA_FROM_DEVICE); + if (unlikely(nr_sgs <= 0 || nr_sgs > 1)) { dev_dbg(dev, "couldn't map dst sg for iaa device %d," " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, iaa_wq->wq->id, ret); @@ -2120,12 +2120,12 @@ static int iaa_comp_adecompress(struct acomp_req *req) if (ret == -EINPROGRESS) return ret; - if (ret != 0) + if (unlikely(ret != 0)) dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret); - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->dst, 1, DMA_FROM_DEVICE); err_map_dst: - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); + dma_unmap_sg(dev, req->src, 1, DMA_TO_DEVICE); out: percpu_ref_put(&iaa_wq->ref); -- 2.27.0