Add a selftest that reproduces the use-after-free triggered when a TCP socket is inserted into a sockmap *before* TLS RX is configured on it (the reverse of the order that is already blocked by the kernel). Vulnerable sequence: 1. bpf_map_update_elem(sockmap, server_fd) -> sk->sk_data_ready = sk_psock_verdict_data_ready 2. setsockopt(server_fd, SOL_TLS, TLS_RX, ...) -> tls_sw_strparser_arm() saves sk_psock_verdict_data_ready as rx_ctx->saved_data_ready, then sets sk->sk_data_ready = tls_data_ready When data arrives: tls_data_ready -> tls_strp_data_ready -> tls_rx_msg_ready -> saved_data_ready() [= sk_psock_verdict_data_ready] -> tcp_read_skb() drains sk_receive_queue via __skb_unlink() without calling tcp_eat_skb(), so copied_seq is never advanced. tls_strp_msg_load() then finds tcp_inq() >= full_len (stale), calls tcp_recv_skb() on an empty queue, hits WARN_ON_ONCE(!first), and returns with frag_list still pointing at the now psock-owned (or already freed) skb. tls_decrypt_sg() subsequently walks that stale frag_list: a use-after-free. The new BPF program (prog_skb_verdict_pass, sk_skb/verdict) returns SK_PASS, which is the specific verdict that triggers the missing tcp_eat_skb() call inside sk_psock_verdict_recv(). The test drives the full setup in the vulnerable order and then attempts a send+recv. After a correct fix the kernel either: (a) rejects setsockopt(TLS_RX) with EBUSY/EINVAL when the socket is already owned by a psock, or (b) completes the data transfer without corruption or kernel warnings. Signed-off-by: Xingwang Xiang --- .../selftests/bpf/prog_tests/sockmap_ktls.c | 109 ++++++++++++++++++ .../selftests/bpf/progs/test_sockmap_ktls.c | 21 ++++ 2 files changed, 130 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index b87e7f39e..e09861e1e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -417,6 +417,113 @@ static void run_tests(int family, enum bpf_map_type map_type) close(map); } +/* + * Regression test for the KTLS + sockmap reverse-order frag_list UAF. + * + * Vulnerable sequence: + * 1. Insert receiver socket into sockmap (sets sk_data_ready = + * sk_psock_verdict_data_ready) + * 2. Configure TLS RX on the same socket: tls_sw_strparser_arm() saves + * sk_psock_verdict_data_ready as rx_ctx->saved_data_ready and replaces + * sk_data_ready with tls_data_ready. + * + * When data arrives, tls_rx_msg_ready() calls saved_data_ready(), which is + * sk_psock_verdict_data_ready(). That drains sk_receive_queue via + * tcp_read_skb() / __skb_unlink() without advancing copied_seq. + * tls_strp_msg_load() then finds an empty queue while tcp_inq() is still + * non-zero, hits WARN_ON_ONCE(!first), and leaves a dangling frag_list + * pointer that tls_decrypt_sg() walks — a use-after-free. + * + * After the fix the kernel either: + * (a) rejects setsockopt(TLS_RX) with EBUSY/EINVAL when the socket is + * already owned by a psock, or + * (b) correctly handles the data path so recv() returns the right data. + */ +static void test_sockmap_ktls_reverse_order_tls(int family, int sotype) +{ + struct tls12_crypto_info_aes_gcm_128 crypto_info = {}; + char send_buf[] = "hello ktls sockmap reverse order"; + char recv_buf[sizeof(send_buf)] = {}; + struct test_sockmap_ktls *skel; + int c = -1, p = -1, zero = 0; + int prog_fd, map_fd; + ssize_t n; + int err; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open_and_load")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_skb_verdict_pass); + map_fd = bpf_map__fd(skel->maps.sock_map_verdict); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_SKB_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk_skb verdict")) + goto out; + + /* Configure TLS TX on the sender (normal order, no sockmap) */ + err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP) client")) + goto out; + + crypto_info.info.version = TLS_1_2_VERSION; + crypto_info.info.cipher_type = TLS_CIPHER_AES_GCM_128; + memset(crypto_info.key, 0x01, sizeof(crypto_info.key)); + memset(crypto_info.salt, 0x02, sizeof(crypto_info.salt)); + + err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info)); + if (!ASSERT_OK(err, "setsockopt(TLS_TX)")) + goto out; + + /* Insert receiver into sockmap BEFORE TLS RX — the vulnerable ordering */ + err = bpf_map_update_elem(map_fd, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem server")) + goto out; + + /* Attempt TLS RX setup AFTER sockmap insertion */ + err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (err) { + /* Kernel correctly rejected TLS ULP on a psock-owned socket */ + ASSERT_TRUE(errno == EINVAL || errno == EBUSY, + "expected EINVAL or EBUSY for TCP_ULP on sockmap socket"); + goto out; + } + + err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_info, sizeof(crypto_info)); + if (err) { + /* Kernel correctly rejected TLS RX after sockmap insertion */ + ASSERT_TRUE(errno == EINVAL || errno == EBUSY || errno == ENOTSUPP, + "expected rejection of TLS_RX on sockmap socket"); + goto out; + } + + /* + * Setup was allowed — verify data transfer is correct. + * A buggy kernel hits WARN_ON_ONCE in tls_strp_load_anchor_with_queue + * and may UAF in tls_decrypt_sg when walking the stale frag_list. + */ + n = send(c, send_buf, sizeof(send_buf), 0); + if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "send")) + goto out; + + n = recv_timeout(p, recv_buf, sizeof(recv_buf), 0, 5); + if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "recv")) + goto out; + + ASSERT_OK(memcmp(send_buf, recv_buf, sizeof(send_buf)), "data integrity"); + +out: + if (c != -1) + close(c); + if (p != -1) + close(p); + test_sockmap_ktls__destroy(skel); +} + static void run_ktls_test(int family, int sotype) { if (test__start_subtest("tls simple offload")) @@ -429,6 +536,8 @@ static void run_ktls_test(int family, int sotype) test_sockmap_ktls_tx_no_buf(family, sotype, true); if (test__start_subtest("tls tx with pop")) test_sockmap_ktls_tx_pop(family, sotype); + if (test__start_subtest("tls rx after sockmap insert")) + test_sockmap_ktls_reverse_order_tls(family, sotype); } void test_sockmap_ktls(void) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c index 83df4919c..facafeaf4 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c @@ -17,6 +17,13 @@ struct { __type(value, int); } sock_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} sock_map_verdict SEC(".maps"); + SEC("sk_msg") int prog_sk_policy(struct sk_msg_md *msg) { @@ -38,3 +45,17 @@ int prog_sk_policy_redir(struct sk_msg_md *msg) bpf_msg_apply_bytes(msg, apply_bytes); return bpf_msg_redirect_map(msg, &sock_map, two, 0); } + +/* + * Verdict program for the reverse-order TLS/sockmap regression test. + * Returns SK_PASS so tcp_read_skb() drains the receive queue via + * sk_psock_verdict_recv() without calling tcp_eat_skb(), which is + * the precondition for the KTLS strparser frag_list UAF. + */ +SEC("sk_skb/verdict") +int prog_skb_verdict_pass(struct __sk_buff *skb) +{ + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; -- 2.54.0