When the BPF sk_msg verdict sets apply_bytes smaller than the current open record, tls_push_record() splits ctx->open_rec into the record being encrypted and a remainder record. The synchronous path reattaches the remainder to ctx->open_rec before continuing. If the selected AEAD provider completes asynchronously, tls_do_encryption() returns -EINPROGRESS after unhooking ctx->open_rec. tls_push_record() currently returns immediately in that case, before the split remainder is reattached. The remainder is no longer reachable through ctx->open_rec or ctx->tx_list, which can silently drop transmitted data and leak the unreachable tls_rec. Keep the split remainder rooted even when encryption of the first record is pending asynchronously, and continue the BPF verdict drain loop after an async record has been queued. If that loop then hits a later verdict error, wait for the pending async encryption before returning the error so zerocopy user pages cannot be released while cryptd still reads them. Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling") Cc: stable@vger.kernel.org # 4.20+ Signed-off-by: Christopher Lusk Assisted-by: Codex:gpt-5.5 Assisted-by: Claude:claude-opus-4-7 --- net/tls/tls_sw.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 964ebc268..5b20be5b4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -840,16 +840,19 @@ static int tls_push_record(struct sock *sk, int flags, rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { - if (rc != -EINPROGRESS) { - tls_err_abort(sk, -EBADMSG); - if (split) { - tls_ctx->pending_open_record_frags = true; - tls_merge_open_record(sk, rec, tmp, orig_end); - } + if (rc == -EINPROGRESS) + goto split_done; + + tls_err_abort(sk, -EBADMSG); + if (split) { + tls_ctx->pending_open_record_frags = true; + tls_merge_open_record(sk, rec, tmp, orig_end); } ctx->async_capable = 1; return rc; - } else if (split) { + } +split_done: + if (split) { msg_pl = &tmp->msg_plaintext; msg_en = &tmp->msg_encrypted; sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); @@ -857,6 +860,11 @@ static int tls_push_record(struct sock *sk, int flags, ctx->open_rec = tmp; } + if (rc < 0) { + ctx->async_capable = 1; + return rc; + } + return tls_tx_records(sk, flags); } @@ -871,6 +879,8 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, struct sock *sk_redir; struct tls_rec *rec; bool enospc, policy, redir_ingress; + bool async = false; + int async_err = 0; int err = 0, send; u32 delta = 0; @@ -920,6 +930,10 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); + if (err == -EINPROGRESS) { + async = true; + err = 0; + } if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); @@ -988,8 +1002,18 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, goto more_data; } out_err: + if (async && err && err != -EINPROGRESS) { + async_err = tls_encrypt_async_wait(ctx); + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + /* tx_lock is held; the worker will reschedule if needed. */ + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, flags); + } + if (async_err) + err = async_err; + } sk_psock_put(sk, psock); - return err; + return err ?: (async ? -EINPROGRESS : 0); } static int tls_sw_push_pending_record(struct sock *sk, int flags) -- 2.54.0