This adds custom filtering for IORING_OP_CONNECT, where the target family is always exposed, and (for AF_INET / AF_INET6) port and address are exposed. port and v4_addr are in network byte order so filter authors can compare against on-wire constants. Skip population unless addr_len covers the populated fields, to avoid leaking stale io_async_msghdr data on short connects. Signed-off-by: Shouvik Kar --- include/uapi/linux/io_uring/bpf_filter.h | 16 +++++++++ io_uring/net.c | 41 ++++++++++++++++++++++++ io_uring/net.h | 7 ++++ io_uring/opdef.c | 2 ++ 4 files changed, 66 insertions(+) diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h index 1b461d792a7b..ce7d78ab13b3 100644 --- a/include/uapi/linux/io_uring/bpf_filter.h +++ b/include/uapi/linux/io_uring/bpf_filter.h @@ -27,6 +27,22 @@ struct io_uring_bpf_ctx { __u64 mode; __u64 resolve; } open; + /* + * For CONNECT: fields are populated only when addr_len covers + * them; unpopulated fields are zero from the caller-side memset + * in io_uring_populate_bpf_ctx(). port and v4_addr are network + * byte order. Filters may only issue BPF_LD|BPF_W|BPF_ABS at + * 4-byte aligned offsets; load + mask for sub-word fields. + */ + struct { + __u32 family; /* sa_family_t zero-extended */ + __be16 port; + __u8 pad[2]; + union { + __be32 v4_addr; + __u8 v6_addr[16]; + }; + } connect; }; }; diff --git a/io_uring/net.c b/io_uring/net.c index 30cd22c0b934..cceb5c1409ca 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1674,6 +1674,47 @@ void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) bctx->socket.protocol = sock->protocol; } +void io_connect_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) +{ + struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); + struct io_async_msghdr *iomsg = req->async_data; + struct sockaddr_storage *ss = &iomsg->addr; + + /* + * move_addr_to_kernel() skips the copy for addr_len == 0, so + * iomsg->addr may hold stale data from a prior CONNECT. Bail + * unless addr_len covers the family discriminator. + */ + if (conn->addr_len < (int)sizeof(sa_family_t)) + return; + + bctx->connect.family = ss->ss_family; + switch (ss->ss_family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)ss; + + if (conn->addr_len < (int)sizeof(*sin)) + break; + bctx->connect.port = sin->sin_port; + bctx->connect.v4_addr = sin->sin_addr.s_addr; + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; + + if (conn->addr_len < (int)sizeof(*sin6)) + break; + bctx->connect.port = sin6->sin6_port; + memcpy(bctx->connect.v6_addr, &sin6->sin6_addr, + sizeof(bctx->connect.v6_addr)); + break; + } + default: + /* family is set; per-family fields stay zero - family-only filtering */ + break; + } +} + int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); diff --git a/io_uring/net.h b/io_uring/net.h index d4d1ddce50e3..51fda715d3c0 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -46,6 +46,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags); int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_socket(struct io_kiocb *req, unsigned int issue_flags); void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req); +void io_connect_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req); int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_connect(struct io_kiocb *req, unsigned int issue_flags); @@ -69,4 +70,10 @@ static inline void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) { } + +static inline void io_connect_bpf_populate(struct io_uring_bpf_ctx *bctx, + struct io_kiocb *req) +{ +} + #endif diff --git a/io_uring/opdef.c b/io_uring/opdef.c index c3ef52b70811..8ea6bd274607 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -203,9 +203,11 @@ const struct io_issue_def io_issue_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, #if defined(CONFIG_NET) + .filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, connect), .async_size = sizeof(struct io_async_msghdr), .prep = io_connect_prep, .issue = io_connect, + .filter_populate = io_connect_bpf_populate, #else .prep = io_eopnotsupp_prep, #endif -- 2.53.0