Scott Mayhew discovered a security exploit in NFS over TLS in tls_alert_recv() due to its assumption it can read data from the msg iterator's kvec.. kTLS implementation splits TLS non-data record payload between the control message buffer (which includes the type such as TLS aler or TLS cipher change) and the rest of the payload (say TLS alert's level/description) which goes into the msg payload buffer. This patch proposes to rework how control messages are setup and used by sock_recvmsg(). If no control message structure is setup, kTLS layer will read and process TLS data record types. As soon as it encounters a TLS control message, it would return an error. At that point, NFS can setup a kvec backed msg buffer and read in the control message such as a TLS alert. Scott found that msg iterator can advance the kvec pointer as a part of the copy process thus we need to revert the iterator before calling into the tls_alert_recv. Reported-by: Scott Mayhew Fixes: 5e052dda121e ("SUNRPC: Recognize control messages in server-side TCP socket code") Suggested-by: Trond Myklebust Suggested-by: Scott Mayhew Signed-off-by: Olga Kornievskaia --- net/sunrpc/svcsock.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 46c156b121db..e2c5e0e626f9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -257,20 +257,47 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; - struct socket *sock = svsk->sk_sock; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; + int ret; + + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN); + } + return ret; +} + +static int +svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg) +{ int ret; + struct socket *sock = svsk->sk_sock; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); ret = sock_recvmsg(sock, msg, MSG_DONTWAIT); - if (unlikely(msg->msg_controllen != sizeof(u))) - ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret); + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags); + } return ret; } @@ -321,7 +348,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -1018,7 +1045,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; -- 2.47.1 A security exploit was discovered in NFS over TLS in tls_alert_recv due to its assumption that there is valid data in the msghdr's iterator's kvec. Instead, this patch proposes the rework how control messages are setup and used by sock_recvmsg(). If no control message structure is setup, kTLS layer will read and process TLS data record types. As soon as it encounters a TLS control message, it would return an error. At that point, NFS can setup a kvec backed control buffer and read in the control message such as a TLS alert. Scott found that a msg iterator can advance the kvec pointer as a part of the copy process thus we need to revert the iterator before calling into the tls_alert_recv. Fixes: dea034b963c8 ("SUNRPC: Capture CMSG metadata on client-side receive") Suggested-by: Trond Myklebust Suggested-by: Scott Mayhew Signed-off-by: Olga Kornievskaia --- net/sunrpc/xprtsock.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 04ff66758fc3..c5f7bbf5775f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -358,7 +358,7 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) static int xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, - struct cmsghdr *cmsg, int ret) + unsigned int *msg_flags, struct cmsghdr *cmsg, int ret) { u8 content_type = tls_get_record_type(sock->sk, cmsg); u8 level, description; @@ -371,7 +371,7 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, * record, even though there might be more frames * waiting to be decrypted. */ - msg->msg_flags &= ~MSG_EOR; + *msg_flags &= ~MSG_EOR; break; case TLS_RECORD_TYPE_ALERT: tls_alert_recv(sock->sk, msg, &level, &description); @@ -386,19 +386,33 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags) +xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; int ret; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); - ret = sock_recvmsg(sock, msg, flags); - if (msg->msg_controllen != sizeof(u)) - ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, flags); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, + -EAGAIN); + } return ret; } @@ -408,7 +422,13 @@ xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) ssize_t ret; if (seek != 0) iov_iter_advance(&msg->msg_iter, seek); - ret = xs_sock_recv_cmsg(sock, msg, flags); + ret = sock_recvmsg(sock, msg, flags); + /* Handle TLS inband control message lazily */ + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags); + } return ret > 0 ? ret + seek : ret; } @@ -434,7 +454,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, size_t count) { iov_iter_discard(&msg->msg_iter, ITER_DEST, count); - return xs_sock_recv_cmsg(sock, msg, flags); + return xs_sock_recvmsg(sock, msg, flags, 0); } #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -- 2.47.1 Revert kvec msg iterator before trying to process a TLS alert when possible. In nvmet_tcp_try_recv_data(), it's assumed that no msg control message buffer is set prior to sock_recvmsg(). Hannes suggested that upon detecting that TLS control message is received log a message and error out. Left comments in the code for the future improvements. Fixes: a1c5dd8355b1 ("nvmet-tcp: control messages for recvmsg()") Suggested-by: Hannes Reinecke Reviewed-by: Hannes Reinecky Signed-off-by: Olga Kornievskaia --- drivers/nvme/target/tcp.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 688033b88d38..055e420d3f2e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1161,6 +1161,7 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) if (unlikely(len < 0)) return len; if (queue->tls_pskid) { + iov_iter_revert(&msg.msg_iter, len); ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); if (ret < 0) return ret; @@ -1217,19 +1218,28 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmd; - int len, ret; + int len; while (msg_data_left(&cmd->recv_msg)) { + /* to detect that we received a TlS alert, we assumed that + * cmg->recv_msg's control buffer is not setup. kTLS will + * return an error when no control buffer is set and + * non-tls-data payload is received. + */ len = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg, cmd->recv_msg.msg_flags); + if (cmd->recv_msg.msg_flags & MSG_CTRUNC) { + if (len == 0 || len == -EIO) { + pr_err("queue %d: unhandled control message\n", + queue->idx); + /* note that unconsumed TLS control message such + * as TLS alert is still on the socket. + */ + return -EAGAIN; + } + } if (len <= 0) return len; - if (queue->tls_pskid) { - ret = nvmet_tcp_tls_record_ok(cmd->queue, - &cmd->recv_msg, cmd->recv_cbuf); - if (ret < 0) - return ret; - } cmd->pdu_recv += len; cmd->rbytes_done += len; @@ -1267,6 +1277,7 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) if (unlikely(len < 0)) return len; if (queue->tls_pskid) { + iov_iter_revert(&msg.msg_iter, len); ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); if (ret < 0) return ret; @@ -1453,10 +1464,6 @@ static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, if (!c->r2t_pdu) goto out_free_data; - if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { - c->recv_msg.msg_control = c->recv_cbuf; - c->recv_msg.msg_controllen = sizeof(c->recv_cbuf); - } c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; list_add_tail(&c->entry, &queue->free_list); @@ -1736,6 +1743,7 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue) return len; } + iov_iter_revert(&msg.msg_iter, len); ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); if (ret < 0) return ret; -- 2.47.1 Instead of trying to read the data from the msg iterator, callers to tls_alert_recv() need to pass in a kvec directly. Signed-off-by: Olga Kornievskaia --- drivers/nvme/target/tcp.c | 13 +++++-------- include/net/handshake.h | 2 +- net/handshake/alert.c | 6 +++--- net/sunrpc/svcsock.c | 17 ++++++++--------- net/sunrpc/xprtsock.c | 19 +++++++++---------- 5 files changed, 26 insertions(+), 31 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 055e420d3f2e..b63eda220c40 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1107,7 +1107,7 @@ static inline bool nvmet_tcp_pdu_valid(u8 type) } static int nvmet_tcp_tls_record_ok(struct nvmet_tcp_queue *queue, - struct msghdr *msg, char *cbuf) + struct kvec *iov, char *cbuf) { struct cmsghdr *cmsg = (struct cmsghdr *)cbuf; u8 ctype, level, description; @@ -1120,7 +1120,7 @@ static int nvmet_tcp_tls_record_ok(struct nvmet_tcp_queue *queue, case TLS_RECORD_TYPE_DATA: break; case TLS_RECORD_TYPE_ALERT: - tls_alert_recv(queue->sock->sk, msg, &level, &description); + tls_alert_recv(queue->sock->sk, iov, &level, &description); if (level == TLS_ALERT_LEVEL_FATAL) { pr_err("queue %d: TLS Alert desc %u\n", queue->idx, description); @@ -1161,8 +1161,7 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) if (unlikely(len < 0)) return len; if (queue->tls_pskid) { - iov_iter_revert(&msg.msg_iter, len); - ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + ret = nvmet_tcp_tls_record_ok(queue, &iov, cbuf); if (ret < 0) return ret; } @@ -1277,8 +1276,7 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) if (unlikely(len < 0)) return len; if (queue->tls_pskid) { - iov_iter_revert(&msg.msg_iter, len); - ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + ret = nvmet_tcp_tls_record_ok(queue, &iov, cbuf); if (ret < 0) return ret; } @@ -1743,8 +1741,7 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue) return len; } - iov_iter_revert(&msg.msg_iter, len); - ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + ret = nvmet_tcp_tls_record_ok(queue, &iov, cbuf); if (ret < 0) return ret; diff --git a/include/net/handshake.h b/include/net/handshake.h index 8ebd4f9ed26e..33ffc8e88923 100644 --- a/include/net/handshake.h +++ b/include/net/handshake.h @@ -43,7 +43,7 @@ bool tls_handshake_cancel(struct sock *sk); void tls_handshake_close(struct socket *sock); u8 tls_get_record_type(const struct sock *sk, const struct cmsghdr *msg); -void tls_alert_recv(const struct sock *sk, const struct msghdr *msg, +void tls_alert_recv(const struct sock *sk, const struct kvec *iov, u8 *level, u8 *description); #endif /* _NET_HANDSHAKE_H */ diff --git a/net/handshake/alert.c b/net/handshake/alert.c index 329d91984683..4662a406b64a 100644 --- a/net/handshake/alert.c +++ b/net/handshake/alert.c @@ -94,13 +94,13 @@ EXPORT_SYMBOL(tls_get_record_type); * @description: OUT - TLS AlertDescription value * */ -void tls_alert_recv(const struct sock *sk, const struct msghdr *msg, +void tls_alert_recv(const struct sock *sk, const struct kvec *iov, u8 *level, u8 *description) { - const struct kvec *iov; u8 *data; - iov = msg->msg_iter.kvec; + if (!iov) + return; data = iov->iov_base; *level = data[0]; *description = data[1]; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e2c5e0e626f9..8701abd7fff2 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -228,7 +228,7 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) } static int -svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg, +svc_tcp_sock_process_cmsg(struct socket *sock, struct kvec *iov, struct cmsghdr *cmsg, int ret) { u8 content_type = tls_get_record_type(sock->sk, cmsg); @@ -238,14 +238,10 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg, case 0: break; case TLS_RECORD_TYPE_DATA: - /* TLS sets EOR at the end of each application data - * record, even though there might be more frames - * waiting to be decrypted. - */ - msg->msg_flags &= ~MSG_EOR; + pr_warn("received TLS DATA; expected TLS control message\n"); break; case TLS_RECORD_TYPE_ALERT: - tls_alert_recv(sock->sk, msg, &level, &description); + tls_alert_recv(sock->sk, iov, &level, &description); ret = (level == TLS_ALERT_LEVEL_FATAL) ? -ENOTCONN : -EAGAIN; break; @@ -280,8 +276,7 @@ svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags) ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT); if (ret > 0 && tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { - iov_iter_revert(&msg.msg_iter, ret); - ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN); + ret = svc_tcp_sock_process_cmsg(sock, &alert_kvec, &u.cmsg, -EAGAIN); } return ret; } @@ -294,6 +289,10 @@ svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg) ret = sock_recvmsg(sock, msg, MSG_DONTWAIT); if (msg->msg_flags & MSG_CTRUNC) { + /* TLS sets EOR at the end of each application data + * record, even though there might be more frames + * waiting to be decrypted. + */ msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); if (ret == 0 || ret == -EIO) ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c5f7bbf5775f..005021773da1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -357,7 +357,7 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) } static int -xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, +xs_sock_process_cmsg(struct socket *sock, struct kvec *iov, unsigned int *msg_flags, struct cmsghdr *cmsg, int ret) { u8 content_type = tls_get_record_type(sock->sk, cmsg); @@ -367,14 +367,10 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, case 0: break; case TLS_RECORD_TYPE_DATA: - /* TLS sets EOR at the end of each application data - * record, even though there might be more frames - * waiting to be decrypted. - */ - *msg_flags &= ~MSG_EOR; + pr_warn("received TLS DATA; expected TLS control message\n"); break; case TLS_RECORD_TYPE_ALERT: - tls_alert_recv(sock->sk, msg, &level, &description); + tls_alert_recv(sock->sk, iov, &level, &description); ret = (level == TLS_ALERT_LEVEL_FATAL) ? -EACCES : -EAGAIN; break; @@ -409,9 +405,8 @@ xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) ret = sock_recvmsg(sock, &msg, flags); if (ret > 0 && tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { - iov_iter_revert(&msg.msg_iter, ret); - ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, - -EAGAIN); + ret = xs_sock_process_cmsg(sock, &alert_kvec, msg_flags, + &u.cmsg, -EAGAIN); } return ret; } @@ -425,6 +420,10 @@ xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) ret = sock_recvmsg(sock, msg, flags); /* Handle TLS inband control message lazily */ if (msg->msg_flags & MSG_CTRUNC) { + /* TLS sets EOR at the end of each application data + * ecord, even though there might be more frames + * waiting to be decrypted. + */ msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); if (ret == 0 || ret == -EIO) ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags); -- 2.47.1