This patch finds the two largest source buffers in a given decompression batch, and submits them first to the IAA decompress engines. This improves decompress batching latency because the hardware has a head start on decompressing the highest latency source buffers in the batch. Workload performance is also significantly improved as a result of this optimization. Signed-off-by: Kanchana P Sridhar --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 49 ++++++++++++++++++++-- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index a447555f4eb9..8d83a1ea15d7 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -2315,12 +2315,46 @@ static __always_inline int iaa_comp_submit_acompress_batch( return ret; } +/* + * Find the two largest source buffers in @reqs for a decompress batch, + * based on @reqs[i]->slen. Save their indices as the first two elements in + * @submit_order, and the rest of the indices from the batch order. + */ +static void get_decompress_batch_submit_order( + struct iaa_req *reqs[], + int nr_pages, + int submit_order[]) +{ + int i, j = 0, max_i = 0, next_max_i = 0; + + for (i = 0; i < nr_pages; ++i) { + if (reqs[i]->slen >= reqs[max_i]->slen) { + next_max_i = max_i; + max_i = i; + } else if ((next_max_i == max_i) || + (reqs[i]->slen > reqs[next_max_i]->slen)) { + next_max_i = i; + } + } + + submit_order[j++] = max_i; + + if (next_max_i != max_i) + submit_order[j++] = next_max_i; + + for (i = 0; i < nr_pages; ++i) { + if ((i != max_i) && (i != next_max_i)) + submit_order[j++] = i; + } +} + static __always_inline int iaa_comp_submit_adecompress_batch( struct iaa_compression_ctx *ctx, struct iaa_req *parent_req, struct iaa_req **reqs, int nr_reqs) { + int submit_order[IAA_CRYPTO_MAX_BATCH_SIZE]; struct scatterlist *sg; int i, err, ret = 0; @@ -2334,12 +2368,19 @@ static __always_inline int iaa_comp_submit_adecompress_batch( reqs[i]->dlen = PAGE_SIZE; } + /* + * Construct the submit order by finding the indices of the two largest + * compressed data buffers in the batch, so that they are submitted + * first. This improves latency of the batch. + */ + get_decompress_batch_submit_order(reqs, nr_reqs, submit_order); + /* * Prepare and submit the batch of iaa_reqs to IAA. IAA will process * these decompress jobs in parallel. */ for (i = 0; i < nr_reqs; ++i) { - err = iaa_comp_adecompress(ctx, reqs[i]); + err = iaa_comp_adecompress(ctx, reqs[submit_order[i]]); /* * In case of idxd desc allocation/submission errors, the @@ -2347,12 +2388,12 @@ static __always_inline int iaa_comp_submit_adecompress_batch( * @err to 0 or an error value. */ if (likely(err == -EINPROGRESS)) { - reqs[i]->dst->length = -EAGAIN; + reqs[submit_order[i]]->dst->length = -EAGAIN; } else if (unlikely(err)) { - reqs[i]->dst->length = err; + reqs[submit_order[i]]->dst->length = err; ret = -EINVAL; } else { - reqs[i]->dst->length = reqs[i]->dlen; + reqs[submit_order[i]]->dst->length = reqs[submit_order[i]]->dlen; } } -- 2.27.0