Add a new getsockopt_iter callback to struct proto_ops that uses sockopt_t, a type-safe wrapper around iov_iter. This provides a clean interface for socket option operations that works with both user and kernel buffers. The sockopt_t type encapsulates an iov_iter and an optlen field. The optlen field, although not suggested by Linus, serves as both input (buffer size) and output (returned data size), allowing callbacks to return a random values independent of the bytes written via copy_to_iter(), so, keep it separated from iov_iter.count. This is preparatory work for removing the SOL_SOCKET level restriction from io_uring getsockopt operations. Suggested-by: Linus Torvalds Signed-off-by: Breno Leitao --- include/linux/net.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index f58b38ab37f8a..94f6c86769afc 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -23,9 +23,26 @@ #include #include #include +#include #include +/** + * struct sockopt - socket option value container + * @iter: iov_iter for reading/writing option data + * @optlen: set by callback to indicate returned data size + * + * Type-safe wrapper for socket option data that works with both + * user and kernel buffers. + * + * The optlen field allows callbacks to return a specific length value + * independent of the bytes written via copy_to_iter(). + */ +typedef struct sockopt { + struct iov_iter iter; + int optlen; +} sockopt_t; + struct poll_table_struct; struct pipe_inode_info; struct inode; @@ -192,6 +209,8 @@ struct proto_ops { unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); + int (*getsockopt_iter)(struct socket *sock, int level, + int optname, sockopt_t *opt); void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); -- 2.47.3 Update do_sock_getsockopt() to use the new getsockopt_iter callback when available. Add do_sock_getsockopt_iter() helper that: 1. Reads optlen from user/kernel space 2. Initializes a sockopt_t with the appropriate iov_iter (kvec for kernel, ubuf for user buffers) and sets opt.optlen 3. Calls the protocol's getsockopt_iter callback 4. Writes opt.optlen back to user/kernel space The callback is responsible for setting opt.optlen to indicate the returned data size. Signed-off-by: Breno Leitao --- net/socket.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index 136b98c54fb37..2d830262b1be5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -2356,6 +2357,38 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int optname)); +static int do_sock_getsockopt_iter(struct socket *sock, + const struct proto_ops *ops, int level, + int optname, sockptr_t optval, + sockptr_t optlen) +{ + struct kvec kvec; + sockopt_t opt; + int koptlen; + int err; + + if (copy_from_sockptr(&koptlen, optlen, sizeof(int))) + return -EFAULT; + + if (optval.is_kernel) { + kvec.iov_base = optval.kernel; + kvec.iov_len = koptlen; + iov_iter_kvec(&opt.iter, ITER_DEST, &kvec, 1, koptlen); + } else { + iov_iter_ubuf(&opt.iter, ITER_DEST, optval.user, koptlen); + } + opt.optlen = koptlen; + + err = ops->getsockopt_iter(sock, level, optname, &opt); + if (err) + return err; + + if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int))) + return -EFAULT; + + return 0; +} + int do_sock_getsockopt(struct socket *sock, bool compat, int level, int optname, sockptr_t optval, sockptr_t optlen) { @@ -2373,15 +2406,18 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level, ops = READ_ONCE(sock->ops); if (level == SOL_SOCKET) { err = sk_getsockopt(sock->sk, level, optname, optval, optlen); - } else if (unlikely(!ops->getsockopt)) { - err = -EOPNOTSUPP; - } else { + } else if (ops->getsockopt_iter) { + err = do_sock_getsockopt_iter(sock, ops, level, optname, + optval, optlen); + } else if (ops->getsockopt) { if (WARN_ONCE(optval.is_kernel || optlen.is_kernel, "Invalid argument type")) return -EOPNOTSUPP; err = ops->getsockopt(sock, level, optname, optval.user, optlen.user); + } else { + err = -EOPNOTSUPP; } if (!compat) -- 2.47.3 Convert netlink's getsockopt implementation to use the new getsockopt_iter callback with sockopt_t. Key changes: - Replace (char __user *optval, int __user *optlen) with sockopt_t *opt - Use opt->optlen for buffer length (input) and returned size (output) - Use copy_to_iter() instead of put_user()/copy_to_user() The optlen field allows callbacks to return a specific length value independent of the bytes written via copy_to_iter(). This enables io_uring to call netlink's getsockopt with kernel buffers. Signed-off-by: Breno Leitao --- net/netlink/af_netlink.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8e5151f0c6e46..8a195eb1ef761 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -1716,7 +1717,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } static int netlink_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -1726,8 +1727,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -1743,6 +1743,8 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, break; case NETLINK_LIST_MEMBERSHIPS: { int pos, idx, shift, err = 0; + u32 group_val; + size_t size; netlink_lock_table(); for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { @@ -1751,14 +1753,14 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, idx = pos / sizeof(unsigned long); shift = (pos % sizeof(unsigned long)) * 8; - if (put_user((u32)(nlk->groups[idx] >> shift), - (u32 __user *)(optval + pos))) { + group_val = (u32)(nlk->groups[idx] >> shift); + size = copy_to_iter(&group_val, sizeof(group_val), &opt->iter); + if (size != sizeof(group_val)) { err = -EFAULT; break; } } - if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen)) - err = -EFAULT; + opt->optlen = ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)); netlink_unlock_table(); return err; } @@ -1784,10 +1786,10 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, len = sizeof(int); val = test_bit(flag, &nlk->flags); - if (put_user(len, optlen) || - copy_to_user(optval, &val, len)) + if (copy_to_iter(&val, len, &opt->iter) != len) return -EFAULT; + opt->optlen = sizeof(int); return 0; } @@ -2813,7 +2815,7 @@ static const struct proto_ops netlink_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = netlink_setsockopt, - .getsockopt = netlink_getsockopt, + .getsockopt_iter = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, .mmap = sock_no_mmap, -- 2.47.3