This patch adds a new procedure, iaa_submit_desc_movdir64b(), that directly calls movdir64b. The core iaa_crypto routines that submit compress and decompress jobs now invoke iaa_submit_desc_movdir64b() in non-irq driver modes, instead of idxd_submit_desc(). idxd_submit_desc() is called only in irq mode. This improves latency for the most commonly used iaa_crypto usage (i.e., async non-irq) in zswap by eliminating redundant computes that would otherwise be incurred in idxd_submit_desc(): For a single-threaded madvise-based workload with the Silesia.tar dataset, these are the before/after batch compression latencies for a compress batch of 8 pages: ================================== p50 (ns) p99 (ns) ================================== before 5,568 6,056 after 5,472 5,848 Change -96 -208 ================================== Signed-off-by: Kanchana P Sridhar --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 30 ++++++++++++++-------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 697e98785335..dfc67109e81e 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1788,6 +1788,24 @@ iaa_setup_decompress_hw_desc(struct idxd_desc *idxd_desc, return desc; } +/* + * Call this for non-irq, non-enqcmds job submissions. + */ +static __always_inline void iaa_submit_desc_movdir64b(struct idxd_wq *wq, + struct idxd_desc *desc) +{ + void __iomem *portal = idxd_wq_portal_addr(wq); + + /* + * The wmb() flushes writes to coherent DMA data before + * possibly triggering a DMA read. The wmb() is necessary + * even on UP because the recipient is a device. + */ + wmb(); + + iosubmit_cmds512(portal, desc->hw, 1); +} + static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, @@ -1826,11 +1844,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, ctx->mode, iaa_device->compression_modes[ctx->mode]); if (likely(!ctx->use_irq)) { - ret = idxd_submit_desc(wq, idxd_desc); - if (ret) { - dev_dbg(dev, "submit_desc failed ret=%d\n", ret); - goto out; - } + iaa_submit_desc_movdir64b(wq, idxd_desc); /* Update stats */ update_total_comp_calls(); @@ -1918,11 +1932,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, desc = iaa_setup_decompress_hw_desc(idxd_desc, src_addr, slen, dst_addr, *dlen); if (likely(!ctx->use_irq)) { - ret = idxd_submit_desc(wq, idxd_desc); - if (ret) { - dev_dbg(dev, "submit_desc failed ret=%d\n", ret); - goto fallback_software_decomp; - } + iaa_submit_desc_movdir64b(wq, idxd_desc); /* Update stats */ update_total_decomp_calls(); -- 2.27.0