When __skb_try_recv_datagram() returns NULL in __unix_dgram_recvmsg(), we hold unix_state_lock() unconditionally. This is because SOCK_SEQPACKET sk needs to return EOF in case its peer has been close()d concurrently. This behaviour totally depends on the timing of the peer's close() and reading sk->sk_shutdown, and taking the lock does not play a role. Let's drop the lock from __unix_dgram_recvmsg() and use READ_ONCE(). Signed-off-by: Kuniyuki Iwashima --- net/unix/af_unix.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 564c970d97ff..1fa232ff4a2e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2528,12 +2528,10 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, &err, &timeo, last)); if (!skb) { /* implies iolock unlocked */ - unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && - (sk->sk_shutdown & RCV_SHUTDOWN)) + (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN)) err = 0; - unix_state_unlock(sk); goto out; } -- 2.50.0.727.gbf7dc18ff4-goog unix_stream_read_skb() checks SOCK_DEAD only when the dequeued skb is OOB skb. unix_stream_read_skb() is called for a SOCK_STREAM socket in SOCKMAP when data is sent to it. The function is invoked via sk_psock_verdict_data_ready(), which is set to sk->sk_data_ready(). During sendmsg(), we check if the receiver has SOCK_DEAD, so there is no point in checking it again later in ->read_skb(). Also, unix_read_skb() for SOCK_DGRAM does not have the test either. Let's remove the SOCK_DEAD test in unix_stream_read_skb(). Signed-off-by: Kuniyuki Iwashima --- net/unix/af_unix.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1fa232ff4a2e..be4c68876740 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2804,14 +2804,6 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) if (unlikely(skb == READ_ONCE(u->oob_skb))) { bool drop = false; - unix_state_lock(sk); - - if (sock_flag(sk, SOCK_DEAD)) { - unix_state_unlock(sk); - kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE); - return -ECONNRESET; - } - spin_lock(&sk->sk_receive_queue.lock); if (likely(skb == u->oob_skb)) { WRITE_ONCE(u->oob_skb, NULL); @@ -2819,8 +2811,6 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) } spin_unlock(&sk->sk_receive_queue.lock); - unix_state_unlock(sk); - if (drop) { kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB); return -EAGAIN; -- 2.50.0.727.gbf7dc18ff4-goog unix_stream_read_skb() calls skb_recv_datagram() with MSG_DONTWAIT, which is mostly equivalent to sock_error(sk) + skb_dequeue(). In the following patch, we will add a new field to cache the number of bytes in the receive queue. Then, we want to avoid introducing atomic ops in the fast path, so we will reuse the receive queue lock. As a preparation for the change, let's not use skb_recv_datagram() in unix_stream_read_skb(). Note that sock_error() is now moved out of the u->iolock mutex as the mutex does not synchronise the peer's close() at all. Signed-off-by: Kuniyuki Iwashima --- net/unix/af_unix.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index be4c68876740..fa2081713dad 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2787,6 +2787,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct unix_sock *u = unix_sk(sk); struct sk_buff *skb; int err; @@ -2794,30 +2795,34 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) return -ENOTCONN; - mutex_lock(&u->iolock); - skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); - mutex_unlock(&u->iolock); - if (!skb) + err = sock_error(sk); + if (err) return err; -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - if (unlikely(skb == READ_ONCE(u->oob_skb))) { - bool drop = false; + mutex_lock(&u->iolock); + spin_lock(&queue->lock); - spin_lock(&sk->sk_receive_queue.lock); - if (likely(skb == u->oob_skb)) { - WRITE_ONCE(u->oob_skb, NULL); - drop = true; - } - spin_unlock(&sk->sk_receive_queue.lock); + skb = __skb_dequeue(queue); + if (!skb) { + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); + return -EAGAIN; + } - if (drop) { - kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB); - return -EAGAIN; - } +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb == u->oob_skb) { + WRITE_ONCE(u->oob_skb, NULL); + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); + + kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB); + return -EAGAIN; } #endif + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); + return recv_actor(sk, skb); } -- 2.50.0.727.gbf7dc18ff4-goog Compared to TCP, ioctl(SIOCINQ) for AF_UNIX SOCK_STREAM socket is more expensive, as unix_inq_len() requires iterating through the receive queue and accumulating skb->len. Let's cache the value for SOCK_STREAM to a new field during sendmsg() and recvmsg(). The field is protected by the receive queue lock. Note that ioctl(SIOCINQ) for SOCK_DGRAM returns the length of the first skb in the queue. SOCK_SEQPACKET still requires iterating through the queue because we do not touch functions shared with unix_dgram_ops. But, if really needed, we can support it by switching __skb_try_recv_datagram() to a custom version. Signed-off-by: Kuniyuki Iwashima --- include/net/af_unix.h | 1 + net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1af1841b7601..603f8cd026e5 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -47,6 +47,7 @@ struct unix_sock { #define peer_wait peer_wq.wait wait_queue_entry_t peer_wake; struct scm_stat scm_stat; + int inq_len; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) struct sk_buff *oob_skb; #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fa2081713dad..aade29d65570 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2297,6 +2297,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, spin_lock(&other->sk_receive_queue.lock); WRITE_ONCE(ousk->oob_skb, skb); + WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1); __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); @@ -2319,6 +2320,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; struct sk_buff *skb = NULL; struct sock *other = NULL; + struct unix_sock *otheru; struct scm_cookie scm; bool fds_sent = false; int err, sent = 0; @@ -2342,14 +2344,16 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_namelen) { err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; - } else { - other = unix_peer(sk); - if (!other) { - err = -ENOTCONN; - goto out_err; - } } + other = unix_peer(sk); + if (!other) { + err = -ENOTCONN; + goto out_err; + } + + otheru = unix_sk(other); + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto out_pipe; @@ -2418,7 +2422,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); - skb_queue_tail(&other->sk_receive_queue, skb); + + spin_lock(&other->sk_receive_queue.lock); + WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); + unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2705,6 +2714,7 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) if (!(state->flags & MSG_PEEK)) { WRITE_ONCE(u->oob_skb, NULL); + WRITE_ONCE(u->inq_len, u->inq_len - 1); if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue && !unix_skb_len(oob_skb->prev)) { @@ -2809,6 +2819,8 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) return -EAGAIN; } + WRITE_ONCE(u->inq_len, u->inq_len - skb->len); + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb == u->oob_skb) { WRITE_ONCE(u->oob_skb, NULL); @@ -2989,7 +3001,11 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, if (unix_skb_len(skb)) break; - skb_unlink(skb, &sk->sk_receive_queue); + spin_lock(&sk->sk_receive_queue.lock); + WRITE_ONCE(u->inq_len, u->inq_len - skb->len); + __skb_unlink(skb, &sk->sk_receive_queue); + spin_unlock(&sk->sk_receive_queue.lock); + consume_skb(skb); if (scm.fp) @@ -3160,9 +3176,11 @@ long unix_inq_len(struct sock *sk) if (READ_ONCE(sk->sk_state) == TCP_LISTEN) return -EINVAL; + if (sk->sk_type == SOCK_STREAM) + return READ_ONCE(unix_sk(sk)->inq_len); + spin_lock(&sk->sk_receive_queue.lock); - if (sk->sk_type == SOCK_STREAM || - sk->sk_type == SOCK_SEQPACKET) { + if (sk->sk_type == SOCK_SEQPACKET) { skb_queue_walk(&sk->sk_receive_queue, skb) amount += unix_skb_len(skb); } else { -- 2.50.0.727.gbf7dc18ff4-goog In unix_stream_read_generic(), state->msg is fetched multiple times. Let's cache it in a local variable. Signed-off-by: Kuniyuki Iwashima --- net/unix/af_unix.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index aade29d65570..074edbbfb315 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2841,20 +2841,21 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { - struct scm_cookie scm; + int noblock = state->flags & MSG_DONTWAIT; struct socket *sock = state->socket; + struct msghdr *msg = state->msg; struct sock *sk = sock->sk; - struct unix_sock *u = unix_sk(sk); - int copied = 0; + size_t size = state->size; int flags = state->flags; - int noblock = flags & MSG_DONTWAIT; bool check_creds = false; - int target; + struct scm_cookie scm; + unsigned int last_len; + struct unix_sock *u; + int copied = 0; int err = 0; long timeo; + int target; int skip; - size_t size = state->size; - unsigned int last_len; if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { err = -EINVAL; @@ -2874,6 +2875,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, memset(&scm, 0, sizeof(scm)); + u = unix_sk(sk); + /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ @@ -2965,14 +2968,12 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, } /* Copy address just once */ - if (state->msg && state->msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, - state->msg->msg_name); - unix_copy_addr(state->msg, skb->sk); + if (msg && msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); - BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, - state->msg->msg_name, - &state->msg->msg_namelen); + unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name, + &msg->msg_namelen); sunaddr = NULL; } @@ -3034,8 +3035,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, } while (size); mutex_unlock(&u->iolock); - if (state->msg) - scm_recv_unix(sock, state->msg, &scm, flags); + if (msg) + scm_recv_unix(sock, msg, &scm, flags); else scm_destroy(&scm); out: -- 2.50.0.727.gbf7dc18ff4-goog We have an application that uses almost the same code for TCP and AF_UNIX (SOCK_STREAM). TCP can use TCP_INQ, but AF_UNIX doesn't have it and requires an extra syscall, ioctl(SIOCINQ) or getsockopt(SO_MEMINFO) as an alternative. Let's introduce the generic version of TCP_INQ. If SO_INQ is enabled, recvmsg() will put a cmsg of SCM_INQ that contains the exact value of ioctl(SIOCINQ). The cmsg is also included when msg->msg_get_inq is non-zero to make sockets io_uring-friendly. Note that SOCK_CUSTOM_SOCKOPT is flagged only for SOCK_STREAM to override setsockopt() for SOL_SOCKET. By having the flag in struct unix_sock, instead of struct sock, we can later add SO_INQ support for TCP and reuse tcp_sk(sk)->recvmsg_inq. Note also that supporting custom getsockopt() for SOL_SOCKET will need preparation for other SOCK_CUSTOM_SOCKOPT users (UDP, vsock, MPTCP). Signed-off-by: Kuniyuki Iwashima --- arch/alpha/include/uapi/asm/socket.h | 3 ++ arch/mips/include/uapi/asm/socket.h | 3 ++ arch/parisc/include/uapi/asm/socket.h | 3 ++ arch/sparc/include/uapi/asm/socket.h | 3 ++ include/net/af_unix.h | 1 + include/uapi/asm-generic/socket.h | 3 ++ net/unix/af_unix.c | 62 ++++++++++++++++++++++++++- 7 files changed, 76 insertions(+), 2 deletions(-) diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 8f1f18adcdb5..5ef57f88df6b 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -152,6 +152,9 @@ #define SO_PASSRIGHTS 83 +#define SO_INQ 84 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 31ac655b7837..72fb1b006da9 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -163,6 +163,9 @@ #define SO_PASSRIGHTS 83 +#define SO_INQ 84 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 1f2d5b7a7f5d..c16ec36dfee6 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -144,6 +144,9 @@ #define SO_PASSRIGHTS 0x4051 +#define SO_INQ 0x4052 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index adcba7329386..71befa109e1c 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -145,6 +145,9 @@ #define SO_PASSRIGHTS 0x005c +#define SO_INQ 0x005d +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 603f8cd026e5..34f53dde65ce 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -48,6 +48,7 @@ struct unix_sock { wait_queue_entry_t peer_wake; struct scm_stat scm_stat; int inq_len; + bool recvmsg_inq; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) struct sk_buff *oob_skb; #endif diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index f333a0ac4ee4..53b5a8c002b1 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -147,6 +147,9 @@ #define SO_PASSRIGHTS 83 +#define SO_INQ 84 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 074edbbfb315..81ef1b7764f7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -934,6 +934,52 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) #define unix_show_fdinfo NULL #endif +static bool unix_custom_sockopt(int optname) +{ + switch (optname) { + case SO_INQ: + return true; + default: + return false; + } +} + +static int unix_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct unix_sock *u = unix_sk(sock->sk); + struct sock *sk = sock->sk; + int val; + + if (level != SOL_SOCKET) + return -EOPNOTSUPP; + + if (!unix_custom_sockopt(optname)) + return sock_setsockopt(sock, level, optname, optval, optlen); + + if (optlen != sizeof(int)) + return -EINVAL; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + switch (optname) { + case SO_INQ: + if (sk->sk_type != SOCK_STREAM) + return -EINVAL; + + if (val > 1 || val < 0) + return -EINVAL; + + WRITE_ONCE(u->recvmsg_inq, val); + break; + default: + return -ENOPROTOOPT; + } + + return 0; +} + static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, @@ -950,6 +996,7 @@ static const struct proto_ops unix_stream_ops = { #endif .listen = unix_listen, .shutdown = unix_shutdown, + .setsockopt = unix_setsockopt, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .read_skb = unix_stream_read_skb, @@ -1116,6 +1163,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, switch (sock->type) { case SOCK_STREAM: + set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); sock->ops = &unix_stream_ops; break; /* @@ -1847,6 +1895,9 @@ static int unix_accept(struct socket *sock, struct socket *newsock, skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait); + if (tsk->sk_type == SOCK_STREAM) + set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + /* attach accepted sock to socket */ unix_state_lock(tsk); unix_update_edges(unix_sk(tsk)); @@ -3035,10 +3086,17 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, } while (size); mutex_unlock(&u->iolock); - if (msg) + if (msg) { scm_recv_unix(sock, msg, &scm, flags); - else + + if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) { + msg->msg_inq = READ_ONCE(u->inq_len); + put_cmsg(msg, SOL_SOCKET, SCM_INQ, + sizeof(msg->msg_inq), &msg->msg_inq); + } + } else { scm_destroy(&scm); + } out: return copied ? : err; } -- 2.50.0.727.gbf7dc18ff4-goog Let's add a simple test to check the basic functionality of SO_INQ. The test does the following: 1. Create socketpair in self->fd[] 2. Enable SO_INQ 3. Send data via self->fd[0] 4. Receive data from self->fd[1] 5. Compare the SCM_INQ cmsg with ioctl(SIOCINQ) Signed-off-by: Kuniyuki Iwashima --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/af_unix/Makefile | 2 +- tools/testing/selftests/net/af_unix/scm_inq.c | 125 ++++++++++++++++++ 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/af_unix/scm_inq.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c6dd2a335cf4..47c293c2962f 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -34,6 +34,7 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello +scm_inq scm_pidfd scm_rights sk_bind_sendto_listen diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 50584479540b..a4b61c6d0290 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect +TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c new file mode 100644 index 000000000000..9d22561e7b8f --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include +#include +#include +#include + +#include "../../kselftest_harness.h" + +#define NR_CHUNKS 100 +#define MSG_LEN 256 + +struct scm_inq { + struct cmsghdr cmsghdr; + int inq; +}; + +FIXTURE(scm_inq) +{ + int fd[2]; +}; + +FIXTURE_VARIANT(scm_inq) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(scm_inq, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(scm_inq) +{ + int err; + + err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(scm_inq) +{ + close(self->fd[0]); + close(self->fd[1]); +} + +static void send_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char buf[MSG_LEN] = {}; + int i, ret; + + for (i = 0; i < NR_CHUNKS; i++) { + ret = send(self->fd[0], buf, sizeof(buf), 0); + ASSERT_EQ(sizeof(buf), ret); + } +} + +static void recv_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + struct msghdr msg = {}; + struct iovec iov = {}; + struct scm_inq cmsg; + char buf[MSG_LEN]; + int i, ret; + int inq; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = &cmsg; + msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + for (i = 0; i < NR_CHUNKS; i++) { + memset(buf, 0, sizeof(buf)); + memset(&cmsg, 0, sizeof(cmsg)); + + ret = recvmsg(self->fd[1], &msg, 0); + ASSERT_EQ(MSG_LEN, ret); + ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); + ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + ret = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, ret); + ASSERT_EQ(cmsg.inq, inq); + } +} + +TEST_F(scm_inq, basic) +{ + int err, inq; + + err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int)); + if (variant->type != SOCK_STREAM) { + ASSERT_EQ(-ENOPROTOOPT, -errno); + return; + } + + ASSERT_EQ(0, err); + + err = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, err); + ASSERT_EQ(0, inq); + + send_chunks(_metadata, self); + recv_chunks(_metadata, self); +} + +TEST_HARNESS_MAIN -- 2.50.0.727.gbf7dc18ff4-goog