Zero check unused SQE fields addr3 and pad2 for timeout and timeout update requests. They're not needed now, but could be used sometime in the future. Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov --- io_uring/timeout.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index cb61d4862fc6..e3815e3465dd 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -449,6 +449,8 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; + if (sqe->addr3 || sqe->__pad2[0]) + return -EINVAL; if (sqe->buf_index || sqe->len || sqe->splice_fd_in) return -EINVAL; @@ -521,6 +523,8 @@ static int __io_timeout_prep(struct io_kiocb *req, unsigned flags; u32 off = READ_ONCE(sqe->off); + if (sqe->addr3 || sqe->__pad2[0]) + return -EINVAL; if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) return -EINVAL; if (off && is_timeout_link) -- 2.53.0 There is some duplication for timespec checks that can be deduplicated with a new function, and it'll be extended in next patches. Signed-off-by: Pavel Begunkov --- io_uring/timeout.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index e3815e3465dd..f6520599e3e8 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -35,6 +35,18 @@ struct io_timeout_rem { bool ltimeout; }; +static int io_parse_user_time(struct timespec64 *ts_out, u64 arg) +{ + struct timespec64 ts; + + if (get_timespec64(&ts, u64_to_user_ptr(arg))) + return -EFAULT; + if (ts.tv_sec < 0 || ts.tv_nsec < 0) + return -EINVAL; + *ts_out = ts; + return 0; +} + static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req, struct io_kiocb *link); @@ -446,6 +458,7 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem); + int ret; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; @@ -464,10 +477,9 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) tr->ltimeout = true; if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) return -EINVAL; - if (get_timespec64(&tr->ts, u64_to_user_ptr(READ_ONCE(sqe->addr2)))) - return -EFAULT; - if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) - return -EINVAL; + ret = io_parse_user_time(&tr->ts, READ_ONCE(sqe->addr2)); + if (ret) + return ret; } else if (tr->flags) { /* timeout removal doesn't support flags */ return -EINVAL; @@ -522,6 +534,7 @@ static int __io_timeout_prep(struct io_kiocb *req, struct io_timeout_data *data; unsigned flags; u32 off = READ_ONCE(sqe->off); + int ret; if (sqe->addr3 || sqe->__pad2[0]) return -EINVAL; @@ -561,11 +574,9 @@ static int __io_timeout_prep(struct io_kiocb *req, data->req = req; data->flags = flags; - if (get_timespec64(&data->ts, u64_to_user_ptr(READ_ONCE(sqe->addr)))) - return -EFAULT; - - if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) - return -EINVAL; + ret = io_parse_user_time(&data->ts, READ_ONCE(sqe->addr)); + if (ret) + return ret; data->mode = io_translate_timeout_mode(flags); -- 2.53.0 It'll be more convenient for next patches to keep ktime in requests instead of timespec64. Convert everything to ktime right after user argument parsing at request prep time. Signed-off-by: Pavel Begunkov --- io_uring/timeout.c | 31 +++++++++++++++---------------- io_uring/timeout.h | 2 +- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index f6520599e3e8..4b67746ea3ca 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -30,12 +30,12 @@ struct io_timeout_rem { u64 addr; /* timeout update */ - struct timespec64 ts; + ktime_t time; u32 flags; bool ltimeout; }; -static int io_parse_user_time(struct timespec64 *ts_out, u64 arg) +static int io_parse_user_time(ktime_t *time, u64 arg) { struct timespec64 ts; @@ -43,7 +43,7 @@ static int io_parse_user_time(struct timespec64 *ts_out, u64 arg) return -EFAULT; if (ts.tv_sec < 0 || ts.tv_nsec < 0) return -EINVAL; - *ts_out = ts; + *time = timespec64_to_ktime(ts); return 0; } @@ -92,7 +92,7 @@ static void io_timeout_complete(struct io_tw_req tw_req, io_tw_token_t tw) /* re-arm timer */ raw_spin_lock_irq(&ctx->timeout_lock); list_add(&timeout->list, ctx->timeout_list.prev); - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); + hrtimer_start(&data->timer, data->time, data->mode); raw_spin_unlock_irq(&ctx->timeout_lock); return; } @@ -407,7 +407,7 @@ static clockid_t io_timeout_get_clock(struct io_timeout_data *data) } static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, - struct timespec64 *ts, enum hrtimer_mode mode) + ktime_t ts, enum hrtimer_mode mode) __must_hold(&ctx->timeout_lock) { struct io_timeout_data *io; @@ -429,12 +429,12 @@ static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, if (hrtimer_try_to_cancel(&io->timer) == -1) return -EALREADY; hrtimer_setup(&io->timer, io_link_timeout_fn, io_timeout_get_clock(io), mode); - hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); + hrtimer_start(&io->timer, ts, mode); return 0; } static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, - struct timespec64 *ts, enum hrtimer_mode mode) + ktime_t time, enum hrtimer_mode mode) __must_hold(&ctx->timeout_lock) { struct io_cancel_data cd = { .ctx = ctx, .data = user_data, }; @@ -447,11 +447,11 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, timeout->off = 0; /* noseq */ data = req->async_data; - data->ts = *ts; + data->time = time; list_add_tail(&timeout->list, &ctx->timeout_list); hrtimer_setup(&data->timer, io_timeout_fn, io_timeout_get_clock(data), mode); - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), mode); + hrtimer_start(&data->timer, data->time, mode); return 0; } @@ -477,7 +477,7 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) tr->ltimeout = true; if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) return -EINVAL; - ret = io_parse_user_time(&tr->ts, READ_ONCE(sqe->addr2)); + ret = io_parse_user_time(&tr->time, READ_ONCE(sqe->addr2)); if (ret) return ret; } else if (tr->flags) { @@ -514,9 +514,9 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) raw_spin_lock_irq(&ctx->timeout_lock); if (tr->ltimeout) - ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); + ret = io_linked_timeout_update(ctx, tr->addr, tr->time, mode); else - ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); + ret = io_timeout_update(ctx, tr->addr, tr->time, mode); raw_spin_unlock_irq(&ctx->timeout_lock); } @@ -574,7 +574,7 @@ static int __io_timeout_prep(struct io_kiocb *req, data->req = req; data->flags = flags; - ret = io_parse_user_time(&data->ts, READ_ONCE(sqe->addr)); + ret = io_parse_user_time(&data->time, READ_ONCE(sqe->addr)); if (ret) return ret; @@ -652,7 +652,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags) } add: list_add(&timeout->list, entry); - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); + hrtimer_start(&data->timer, data->time, data->mode); raw_spin_unlock_irq(&ctx->timeout_lock); return IOU_ISSUE_SKIP_COMPLETE; } @@ -670,8 +670,7 @@ void io_queue_linked_timeout(struct io_kiocb *req) if (timeout->head) { struct io_timeout_data *data = req->async_data; - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), - data->mode); + hrtimer_start(&data->timer, data->time, data->mode); list_add_tail(&timeout->list, &ctx->ltimeout_list); } raw_spin_unlock_irq(&ctx->timeout_lock); diff --git a/io_uring/timeout.h b/io_uring/timeout.h index 2b7c9ad72992..1620f94dd45a 100644 --- a/io_uring/timeout.h +++ b/io_uring/timeout.h @@ -3,7 +3,7 @@ struct io_timeout_data { struct io_kiocb *req; struct hrtimer timer; - struct timespec64 ts; + ktime_t time; enum hrtimer_mode mode; u32 flags; }; -- 2.53.0 One the things the user has always keep in mind is that any user pointers they put into an SQE is not going to be read by the kernel until submission happens, and the user has to ensure the pointee stays alive until then. For example, snippet below will lead to UAF of the on stack variable ts. Instead of passing the timeout value as a pointer allow to store it immediately in the SQE. The user has to set a new flag called IORING_TIMEOUT_IMMEDIATE_ARG, in which case sqe->addr for timeout or sqe->addr2 for timeout update requests will be interpreted as a time value in nanosecods. void prep_timeout(struct io_uring_sqe *sqe) { struct __kernel_timespec ts = {...}; prep_timeout(sqe, &ts); } void submit() { sqe = get_sqe(); prep_timeout(sqe); io_uring_submit(); } Signed-off-by: Pavel Begunkov --- include/uapi/linux/io_uring.h | 5 +++++ io_uring/timeout.c | 20 +++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 17475c2045fb..17ac1b785440 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -343,6 +343,10 @@ enum io_uring_op { /* * sqe->timeout_flags + * + * IORING_TIMEOUT_IMMEDIATE_ARG: If set, sqe->addr stores the timeout + * value in nanoseconds instead of + * pointing to a timespec. */ #define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_UPDATE (1U << 1) @@ -351,6 +355,7 @@ enum io_uring_op { #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_MULTISHOT (1U << 6) +#define IORING_TIMEOUT_IMMEDIATE_ARG (1U << 7) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 4b67746ea3ca..8eddf8add7a2 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -35,10 +35,17 @@ struct io_timeout_rem { bool ltimeout; }; -static int io_parse_user_time(ktime_t *time, u64 arg) +static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) { struct timespec64 ts; + if (flags & IORING_TIMEOUT_IMMEDIATE_ARG) { + *time = ns_to_ktime(arg); + if (*time < 0) + return -EINVAL; + return 0; + } + if (get_timespec64(&ts, u64_to_user_ptr(arg))) return -EFAULT; if (ts.tv_sec < 0 || ts.tv_nsec < 0) @@ -475,9 +482,11 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) tr->ltimeout = true; - if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) + if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK | + IORING_TIMEOUT_ABS | + IORING_TIMEOUT_IMMEDIATE_ARG)) return -EINVAL; - ret = io_parse_user_time(&tr->time, READ_ONCE(sqe->addr2)); + ret = io_parse_user_time(&tr->time, READ_ONCE(sqe->addr2), tr->flags); if (ret) return ret; } else if (tr->flags) { @@ -545,7 +554,8 @@ static int __io_timeout_prep(struct io_kiocb *req, flags = READ_ONCE(sqe->timeout_flags); if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | IORING_TIMEOUT_ETIME_SUCCESS | - IORING_TIMEOUT_MULTISHOT)) + IORING_TIMEOUT_MULTISHOT | + IORING_TIMEOUT_IMMEDIATE_ARG)) return -EINVAL; /* more than one clock specified is invalid, obviously */ if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) @@ -574,7 +584,7 @@ static int __io_timeout_prep(struct io_kiocb *req, data->req = req; data->flags = flags; - ret = io_parse_user_time(&data->time, READ_ONCE(sqe->addr)); + ret = io_parse_user_time(&data->time, READ_ONCE(sqe->addr), flags); if (ret) return ret; -- 2.53.0