From: Chuck Lever The recvmsg path pairs tls_decrypt_async_wait() with __skb_queue_purge(&ctx->async_hold). Bundling the two into tls_decrypt_async_drain() gives later patches a single call for async teardown. The purge is kept separate from tls_decrypt_async_wait() because other callers (the -EBUSY fallback in tls_do_decryption and the tls_strp_msg_hold error path) need to synchronize without discarding held skbs that are still awaiting delivery. Reviewed-by: Hannes Reinecke Reviewed-by: Alistair Francis Signed-off-by: Chuck Lever --- net/tls/tls_sw.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a656ce2357589672bcef24343fef0aa83606cf41..09ccfe82af1a6c38978327e941de34818b5da7a8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -249,6 +249,18 @@ static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) return ctx->async_wait.err; } +/* Collect all pending async AEAD completions and release the + * skbs held for them. Returns the crypto error if any + * operation failed, zero otherwise. + */ +static int tls_decrypt_async_drain(struct tls_sw_context_rx *ctx) +{ + int ret = tls_decrypt_async_wait(ctx); + + __skb_queue_purge(&ctx->async_hold); + return ret; +} + static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, @@ -2222,9 +2234,8 @@ int tls_sw_recvmsg(struct sock *sk, if (async) { int ret; - /* Wait for all previously submitted records to be decrypted */ - ret = tls_decrypt_async_wait(ctx); - __skb_queue_purge(&ctx->async_hold); + /* Drain all pending async decryptions and their held skbs */ + ret = tls_decrypt_async_drain(ctx); if (ret) { if (err >= 0 || err == -EINPROGRESS) -- 2.53.0 From: Chuck Lever recvmsg, read_sock, and splice_read each open-code a tls_err_abort() call after tls_rx_one_record() fails. Move the abort into tls_rx_one_record() so each receive path shares a single decrypt-and-abort sequence. Suggested-by: Sabrina Dubroca Reviewed-by: Hannes Reinecke Signed-off-by: Chuck Lever --- net/tls/tls_sw.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 09ccfe82af1a6c38978327e941de34818b5da7a8..bdbdaf40b3384298c80082c3acabcdb9a2becfc8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1821,8 +1821,10 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, err = tls_decrypt_device(sk, msg, tls_ctx, darg); if (!err) err = tls_decrypt_sw(sk, tls_ctx, msg, darg); - if (err < 0) + if (err < 0) { + tls_err_abort(sk, -EBADMSG); return err; + } rxm = strp_msg(darg->skb); rxm->offset += prot->prepend_size; @@ -2133,10 +2135,8 @@ int tls_sw_recvmsg(struct sock *sk, darg.async = false; err = tls_rx_one_record(sk, msg, &darg); - if (err < 0) { - tls_err_abort(sk, -EBADMSG); + if (err < 0) goto recv_end; - } async |= darg.async; @@ -2295,10 +2295,8 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, memset(&darg.inargs, 0, sizeof(darg.inargs)); err = tls_rx_one_record(sk, NULL, &darg); - if (err < 0) { - tls_err_abort(sk, -EBADMSG); + if (err < 0) goto splice_read_end; - } tls_rx_rec_done(ctx); skb = darg.skb; @@ -2381,10 +2379,8 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, memset(&darg.inargs, 0, sizeof(darg.inargs)); err = tls_rx_one_record(sk, NULL, &darg); - if (err < 0) { - tls_err_abort(sk, -EBADMSG); + if (err < 0) goto read_sock_end; - } released = tls_read_flush_backlog(sk, prot, INT_MAX, 0, decrypted, -- 2.53.0 From: Chuck Lever Per ISO/IEC 9899:2011 section 6.2.4p2, a pointer value becomes indeterminate when the object it points to reaches the end of its lifetime; Annex J.2 classifies the use of such a value as undefined behavior. In tls_sw_read_sock(), consume_skb(skb) in the fully-consumed path frees the skb, but the "do { } while (skb)" loop condition then evaluates that freed pointer. Although the value is never dereferenced -- the loop either continues and overwrites skb, or exits -- any future change that adds a dereference between consume_skb() and the loop condition would produce a silent use-after-free. Fixes: 662fbcec32f4 ("net/tls: implement ->read_sock()") Reviewed-by: Hannes Reinecke Reviewed-by: Alistair Francis Signed-off-by: Chuck Lever --- net/tls/tls_sw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index bdbdaf40b3384298c80082c3acabcdb9a2becfc8..07f4a3d1a6f854acc7762608cc7741b3de95c195 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2364,7 +2364,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, goto read_sock_end; decrypted = 0; - do { + for (;;) { if (!skb_queue_empty(&ctx->rx_list)) { skb = __skb_dequeue(&ctx->rx_list); rxm = strp_msg(skb); @@ -2413,10 +2413,11 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, goto read_sock_requeue; } else { consume_skb(skb); + skb = NULL; if (!desc->count) - skb = NULL; + break; } - } while (skb); + } read_sock_end: tls_rx_reader_release(sk, ctx); -- 2.53.0 From: Chuck Lever tls_strp_msg_done() conflates releasing the current record with checking for the next one via tls_strp_check_rcv(). Batch processing requires releasing a record without immediately triggering that check, so the release step is separated into tls_strp_msg_release(). tls_strp_msg_done() is preserved as a wrapper for existing callers. Reviewed-by: Hannes Reinecke Reviewed-by: Alistair Francis Signed-off-by: Chuck Lever --- net/tls/tls.h | 1 + net/tls/tls_strp.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/net/tls/tls.h b/net/tls/tls.h index e8f81a006520027110962655f7b69a5ca58d7fb6..a97f1acef31d3a4ad1442c20c6914a9f5198a1e0 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -193,6 +193,7 @@ int tls_strp_init(struct tls_strparser *strp, struct sock *sk); void tls_strp_data_ready(struct tls_strparser *strp); void tls_strp_check_rcv(struct tls_strparser *strp); +void tls_strp_msg_release(struct tls_strparser *strp); void tls_strp_msg_done(struct tls_strparser *strp); int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 98e12f0ff57e5158c7518385aa0579528520d63f..a7648ebde162b1bd944f53d44c89cd7da4cef082 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -581,7 +581,16 @@ static void tls_strp_work(struct work_struct *w) release_sock(strp->sk); } -void tls_strp_msg_done(struct tls_strparser *strp) +/** + * tls_strp_msg_release - release the current strparser message + * @strp: TLS stream parser instance + * + * Release the current record without triggering a check for the + * next record. Callers must invoke tls_strp_check_rcv() before + * releasing the socket lock, or queued data will stall until + * the next tls_strp_data_ready() event. + */ +void tls_strp_msg_release(struct tls_strparser *strp) { WARN_ON(!strp->stm.full_len); @@ -592,7 +601,11 @@ void tls_strp_msg_done(struct tls_strparser *strp) WRITE_ONCE(strp->msg_ready, 0); memset(&strp->stm, 0, sizeof(strp->stm)); +} +void tls_strp_msg_done(struct tls_strparser *strp) +{ + tls_strp_msg_release(strp); tls_strp_check_rcv(strp); } -- 2.53.0 From: Chuck Lever Each record release via tls_strp_msg_done() triggers tls_strp_check_rcv(), which calls tls_rx_msg_ready() and fires saved_data_ready(). During a multi-record receive, the first N-1 wakeups are pure overhead: the caller is already running and will pick up subsequent records on the next loop iteration. The same waste occurs on the recvmsg and splice_read paths. Replace tls_strp_msg_done() with tls_strp_msg_release() in all three receive paths (read_sock, recvmsg, splice_read), deferring the tls_strp_check_rcv() call to each path's exit point. Factor tls_rx_msg_ready() out of tls_strp_read_sock() so that parsing a record no longer fires the callback directly, and introduce tls_strp_check_rcv_quiet() for use in tls_rx_rec_wait(), which parses queued data without notifying. With no remaining callers, tls_strp_msg_done() and its wrapper tls_rx_rec_done() are removed. Acked-by: Alistair Francis Reviewed-by: Hannes Reinecke Signed-off-by: Chuck Lever --- net/tls/tls.h | 2 +- net/tls/tls_strp.c | 27 +++++++++++++++++++-------- net/tls/tls_sw.c | 21 ++++++++++++++------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/net/tls/tls.h b/net/tls/tls.h index a97f1acef31d3a4ad1442c20c6914a9f5198a1e0..0ab3b83c37243ab393367ac80836b9fd8fec7f82 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -193,8 +193,8 @@ int tls_strp_init(struct tls_strparser *strp, struct sock *sk); void tls_strp_data_ready(struct tls_strparser *strp); void tls_strp_check_rcv(struct tls_strparser *strp); +void tls_strp_check_rcv_quiet(struct tls_strparser *strp); void tls_strp_msg_release(struct tls_strparser *strp); -void tls_strp_msg_done(struct tls_strparser *strp); int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); void tls_rx_msg_ready(struct tls_strparser *strp); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index a7648ebde162b1bd944f53d44c89cd7da4cef082..6cf274380da2785b58206cf0e70cf4fee033a223 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -368,7 +368,6 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, desc->count = 0; WRITE_ONCE(strp->msg_ready, 1); - tls_rx_msg_ready(strp); } return ret; @@ -539,11 +538,27 @@ static int tls_strp_read_sock(struct tls_strparser *strp) return tls_strp_read_copy(strp, false); WRITE_ONCE(strp->msg_ready, 1); - tls_rx_msg_ready(strp); return 0; } +/** + * tls_strp_check_rcv_quiet - parse without consumer notification + * @strp: TLS stream parser instance + * + * Parse queued data without firing the consumer notification. A subsequent + * tls_strp_check_rcv() is required before the socket lock is released; + * otherwise queued data stalls until the next tls_strp_data_ready() event. + */ +void tls_strp_check_rcv_quiet(struct tls_strparser *strp) +{ + if (unlikely(strp->stopped) || strp->msg_ready) + return; + + if (tls_strp_read_sock(strp) == -ENOMEM) + queue_work(tls_strp_wq, &strp->work); +} + void tls_strp_check_rcv(struct tls_strparser *strp) { if (unlikely(strp->stopped) || strp->msg_ready) @@ -551,6 +566,8 @@ void tls_strp_check_rcv(struct tls_strparser *strp) if (tls_strp_read_sock(strp) == -ENOMEM) queue_work(tls_strp_wq, &strp->work); + else if (strp->msg_ready) + tls_rx_msg_ready(strp); } /* Lower sock lock held */ @@ -603,12 +620,6 @@ void tls_strp_msg_release(struct tls_strparser *strp) memset(&strp->stm, 0, sizeof(strp->stm)); } -void tls_strp_msg_done(struct tls_strparser *strp) -{ - tls_strp_msg_release(strp); - tls_strp_check_rcv(strp); -} - void tls_strp_stop(struct tls_strparser *strp) { strp->stopped = 1; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 07f4a3d1a6f854acc7762608cc7741b3de95c195..381a723b6cacc669e333752af34f051f296d6f52 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1384,7 +1384,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return ret; if (!skb_queue_empty(&sk->sk_receive_queue)) { - tls_strp_check_rcv(&ctx->strp); + /* tls_strp_check_rcv() is called at each receive + * path's exit before the socket lock is released. + */ + tls_strp_check_rcv_quiet(&ctx->strp); if (tls_strp_msg_ready(ctx)) break; } @@ -1867,9 +1870,9 @@ static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, return 1; } -static void tls_rx_rec_done(struct tls_sw_context_rx *ctx) +static void tls_rx_rec_release(struct tls_sw_context_rx *ctx) { - tls_strp_msg_done(&ctx->strp); + tls_strp_msg_release(&ctx->strp); } /* This function traverses the rx_list in tls receive context to copies the @@ -2150,7 +2153,7 @@ int tls_sw_recvmsg(struct sock *sk, err = tls_record_content_type(msg, tls_msg(darg.skb), &control); if (err <= 0) { DEBUG_NET_WARN_ON_ONCE(darg.zc); - tls_rx_rec_done(ctx); + tls_rx_rec_release(ctx); put_on_rx_list_err: __skb_queue_tail(&ctx->rx_list, darg.skb); goto recv_end; @@ -2164,7 +2167,8 @@ int tls_sw_recvmsg(struct sock *sk, /* TLS 1.3 may have updated the length by more than overhead */ rxm = strp_msg(darg.skb); chunk = rxm->full_len; - tls_rx_rec_done(ctx); + tls_rx_rec_release(ctx); + tls_strp_check_rcv_quiet(&ctx->strp); if (!darg.zc) { bool partially_consumed = chunk > len; @@ -2258,6 +2262,7 @@ int tls_sw_recvmsg(struct sock *sk, copied += decrypted; end: + tls_strp_check_rcv(&ctx->strp); tls_rx_reader_unlock(sk, ctx); if (psock) sk_psock_put(sk, psock); @@ -2298,7 +2303,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (err < 0) goto splice_read_end; - tls_rx_rec_done(ctx); + tls_rx_rec_release(ctx); skb = darg.skb; } @@ -2325,6 +2330,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, consume_skb(skb); splice_read_end: + tls_strp_check_rcv(&ctx->strp); tls_rx_reader_unlock(sk, ctx); return copied ? : err; @@ -2390,7 +2396,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, tlm = tls_msg(skb); decrypted += rxm->full_len; - tls_rx_rec_done(ctx); + tls_rx_rec_release(ctx); } /* read_sock does not support reading control messages */ @@ -2420,6 +2426,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, } read_sock_end: + tls_strp_check_rcv(&ctx->strp); tls_rx_reader_release(sk, ctx); return copied ? : err; -- 2.53.0 From: Chuck Lever While lock_sock is held, incoming TCP segments land on sk->sk_backlog rather than sk->sk_receive_queue. tls_rx_rec_wait() inspects only sk_receive_queue, so backlog data remains invisible. For non-blocking callers (read_sock, and recvmsg or splice_read with MSG_DONTWAIT) this causes a spurious -EAGAIN. For blocking callers it forces an unnecessary sleep/wakeup cycle. Flush the backlog inside tls_rx_rec_wait() before checking sk_receive_queue so the strparser can parse newly-arrived segments immediately. Suggested-by: Sabrina Dubroca Reviewed-by: Hannes Reinecke Signed-off-by: Chuck Lever --- net/tls/tls_sw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 381a723b6cacc669e333752af34f051f296d6f52..5b154afbd7ac2ddd51b46d8d6bef0a7a41f0a841 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1383,6 +1383,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (ret < 0) return ret; + sk_flush_backlog(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) { /* tls_strp_check_rcv() is called at each receive * path's exit before the socket lock is released. -- 2.53.0 From: Chuck Lever Pipelining multiple AEAD operations requires separating decryption from delivery so that several records can be submitted before any are passed to the read_actor callback. The main loop in tls_sw_read_sock() is split into two explicit phases: a submit phase that decrypts one record onto ctx->rx_list, and a deliver phase that drains rx_list and passes each cleartext skb to the read_actor callback. With a single record per submit phase, behavior is identical to the previous code. A subsequent patch will extend the submit phase to pipeline multiple AEAD operations. Reviewed-by: Hannes Reinecke Signed-off-by: Chuck Lever --- net/tls/tls_sw.c | 70 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5b154afbd7ac2ddd51b46d8d6bef0a7a41f0a841..5ae7e0c026e4437fe442c3a77b0a6d9623816ce1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2346,8 +2346,8 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; - struct strp_msg *rxm = NULL; struct sk_buff *skb = NULL; + struct strp_msg *rxm; struct sk_psock *psock; size_t flushed_at = 0; bool released = true; @@ -2372,13 +2372,10 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, decrypted = 0; for (;;) { - if (!skb_queue_empty(&ctx->rx_list)) { - skb = __skb_dequeue(&ctx->rx_list); - rxm = strp_msg(skb); - tlm = tls_msg(skb); - } else { - struct tls_decrypt_arg darg; + struct tls_decrypt_arg darg; + /* Phase 1: Submit -- decrypt one record onto rx_list. */ + if (skb_queue_empty(&ctx->rx_list)) { err = tls_rx_rec_wait(sk, NULL, true, released); if (err <= 0) goto read_sock_end; @@ -2392,38 +2389,43 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, released = tls_read_flush_backlog(sk, prot, INT_MAX, 0, decrypted, &flushed_at); - skb = darg.skb; + decrypted += strp_msg(darg.skb)->full_len; + tls_rx_rec_release(ctx); + __skb_queue_tail(&ctx->rx_list, darg.skb); + } + + /* Phase 2: Deliver -- drain rx_list to read_actor */ + while ((skb = __skb_dequeue(&ctx->rx_list)) != NULL) { rxm = strp_msg(skb); tlm = tls_msg(skb); - decrypted += rxm->full_len; - tls_rx_rec_release(ctx); - } - - /* read_sock does not support reading control messages */ - if (tlm->control != TLS_RECORD_TYPE_DATA) { - err = -EINVAL; - goto read_sock_requeue; - } - - used = read_actor(desc, skb, rxm->offset, rxm->full_len); - if (used <= 0) { - if (!copied) - err = used; - goto read_sock_requeue; - } - copied += used; - if (used < rxm->full_len) { - rxm->offset += used; - rxm->full_len -= used; - if (!desc->count) + /* read_sock does not support reading control messages */ + if (tlm->control != TLS_RECORD_TYPE_DATA) { + err = -EINVAL; goto read_sock_requeue; - } else { - consume_skb(skb); - skb = NULL; - if (!desc->count) - break; + } + + used = read_actor(desc, skb, rxm->offset, + rxm->full_len); + if (used <= 0) { + if (!copied) + err = used; + goto read_sock_requeue; + } + copied += used; + if (used < rxm->full_len) { + rxm->offset += used; + rxm->full_len -= used; + if (!desc->count) + goto read_sock_requeue; + } else { + consume_skb(skb); + skb = NULL; + } } + /* Drain all of rx_list before honoring !desc->count */ + if (!desc->count) + break; } read_sock_end: -- 2.53.0 From: Chuck Lever tls_sw_read_sock() decrypts one TLS record at a time, blocking until each AEAD operation completes before proceeding. Hardware async crypto engines depend on pipelining multiple operations to achieve full throughput, and the one-at-a-time model prevents that. Kernel consumers such as NVMe-TCP and NFSD (when using TLS) are therefore unable to benefit from hardware offload. When ctx->async_capable is true, the submit phase now loops up to TLS_READ_SOCK_BATCH (16) records. The first record waits via tls_rx_rec_wait(); subsequent iterations use tls_strp_msg_ready() and tls_strp_check_rcv() to collect records already queued on the socket without blocking. Each record is submitted with darg.async set, and all resulting skbs are appended to rx_list. After the submit loop, a single tls_decrypt_async_drain() collects all pending AEAD completions before the deliver phase passes cleartext records to the consumer. The batch bound of 16 limits concurrent memory consumption to 16 cleartext skbs plus their AEAD contexts. If async_capable is false, the loop exits after one record and the async wait is skipped, preserving prior behavior. Reviewed-by: Hannes Reinecke Signed-off-by: Chuck Lever --- net/tls/tls_sw.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 16 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5ae7e0c026e4437fe442c3a77b0a6d9623816ce1..bc500ba7ce81eb33763c37a8b73473c42dc66044 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -261,6 +261,12 @@ static int tls_decrypt_async_drain(struct tls_sw_context_rx *ctx) return ret; } +/* Submit an AEAD decrypt request. On success with darg->async set, + * the caller must not touch aead_req; the completion handler frees + * it. Every error return clears darg->async and guarantees no + * in-flight AEAD operation remains -- callers rely on this to + * safely free aead_req and to skip async drain on error paths. + */ static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, @@ -2340,6 +2346,13 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, goto splice_read_end; } +/* Bound on concurrent async AEAD submissions per read_sock + * call. Chosen to fill typical hardware crypto pipelines + * without excessive memory consumption (each in-flight record + * holds one cleartext skb plus its AEAD request context). + */ +#define TLS_READ_SOCK_BATCH 16 + int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t read_actor) { @@ -2351,6 +2364,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, struct sk_psock *psock; size_t flushed_at = 0; bool released = true; + bool async = false; struct tls_msg *tlm; ssize_t copied = 0; ssize_t decrypted; @@ -2373,25 +2387,61 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, decrypted = 0; for (;;) { struct tls_decrypt_arg darg; + int nr_async = 0; - /* Phase 1: Submit -- decrypt one record onto rx_list. */ + /* Phase 1: Submit -- decrypt records onto rx_list. */ if (skb_queue_empty(&ctx->rx_list)) { - err = tls_rx_rec_wait(sk, NULL, true, released); - if (err <= 0) + while (nr_async < TLS_READ_SOCK_BATCH) { + if (nr_async == 0) { + err = tls_rx_rec_wait(sk, NULL, + true, + released); + if (err <= 0) + goto read_sock_end; + } else { + if (!tls_strp_msg_ready(ctx)) { + tls_strp_check_rcv_quiet(&ctx->strp); + if (!tls_strp_msg_ready(ctx)) + break; + } + if (!tls_strp_msg_load(&ctx->strp, + released)) + break; + } + + memset(&darg.inargs, 0, sizeof(darg.inargs)); + darg.async = ctx->async_capable; + + err = tls_rx_one_record(sk, NULL, &darg); + if (err < 0) + goto read_sock_end; + + async |= darg.async; + released = tls_read_flush_backlog(sk, prot, + INT_MAX, + 0, + decrypted, + &flushed_at); + decrypted += strp_msg(darg.skb)->full_len; + tls_rx_rec_release(ctx); + __skb_queue_tail(&ctx->rx_list, darg.skb); + nr_async++; + + if (!ctx->async_capable) + break; + } + } + + /* Async wait -- collect pending AEAD completions */ + if (async) { + int ret = tls_decrypt_async_drain(ctx); + + async = false; + if (ret) { + __skb_queue_purge(&ctx->rx_list); + err = ret; goto read_sock_end; - - memset(&darg.inargs, 0, sizeof(darg.inargs)); - - err = tls_rx_one_record(sk, NULL, &darg); - if (err < 0) - goto read_sock_end; - - released = tls_read_flush_backlog(sk, prot, INT_MAX, - 0, decrypted, - &flushed_at); - decrypted += strp_msg(darg.skb)->full_len; - tls_rx_rec_release(ctx); - __skb_queue_tail(&ctx->rx_list, darg.skb); + } } /* Phase 2: Deliver -- drain rx_list to read_actor */ @@ -2429,6 +2479,16 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, } read_sock_end: + if (async) { + int ret = tls_decrypt_async_drain(ctx); + + __skb_queue_purge(&ctx->rx_list); + /* Preserve the error that triggered early exit; + * a crypto drain error is secondary. + */ + if (ret && !err) + err = ret; + } tls_strp_check_rcv(&ctx->strp); tls_rx_reader_release(sk, ctx); return copied ? : err; -- 2.53.0