Add a structure that keeps arguments needed for the current round of waiting. Namely, the number of CQEs to wait for in a form of CQ tail index and timeout. Signed-off-by: Pavel Begunkov --- io_uring/io_uring.c | 22 ++++++++++++---------- io_uring/io_uring.h | 14 +++++++++++--- io_uring/napi.c | 4 ++-- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4139cfc84221..29f34fbcbb01 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2508,8 +2508,8 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer) struct io_ring_ctx *ctx = iowq->ctx; /* no general timeout, or shorter (or equal), we are done */ - if (iowq->timeout == KTIME_MAX || - ktime_compare(iowq->min_timeout, iowq->timeout) >= 0) + if (iowq->ls.timeout == KTIME_MAX || + ktime_compare(iowq->min_timeout, iowq->ls.timeout) >= 0) goto out_wake; /* work we may need to run, wake function will see if we need to wake */ if (io_has_work(ctx)) @@ -2535,7 +2535,7 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer) } hrtimer_update_function(&iowq->t, io_cqring_timer_wakeup); - hrtimer_set_expires(timer, iowq->timeout); + hrtimer_set_expires(timer, iowq->ls.timeout); return HRTIMER_RESTART; out_wake: return io_cqring_timer_wakeup(timer); @@ -2551,7 +2551,7 @@ static int io_cqring_schedule_timeout(struct io_wait_queue *iowq, hrtimer_setup_on_stack(&iowq->t, io_cqring_min_timer_wakeup, clock_id, HRTIMER_MODE_ABS); } else { - timeout = iowq->timeout; + timeout = iowq->ls.timeout; hrtimer_setup_on_stack(&iowq->t, io_cqring_timer_wakeup, clock_id, HRTIMER_MODE_ABS); } @@ -2592,7 +2592,7 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx, */ if (ext_arg->iowait && current_pending_io()) current->in_iowait = 1; - if (iowq->timeout != KTIME_MAX || iowq->min_timeout) + if (iowq->ls.timeout != KTIME_MAX || iowq->min_timeout) ret = io_cqring_schedule_timeout(iowq, ctx->clockid, start_time); else schedule(); @@ -2650,18 +2650,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, iowq.wqe.private = current; INIT_LIST_HEAD(&iowq.wqe.entry); iowq.ctx = ctx; - iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; + iowq.ls.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail); iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); iowq.hit_timeout = 0; iowq.min_timeout = ext_arg->min_time; - iowq.timeout = KTIME_MAX; + iowq.ls.timeout = KTIME_MAX; start_time = io_get_time(ctx); if (ext_arg->ts_set) { - iowq.timeout = timespec64_to_ktime(ext_arg->ts); + ktime_t timeout = timespec64_to_ktime(ext_arg->ts); + if (!(flags & IORING_ENTER_ABS_TIMER)) - iowq.timeout = ktime_add(iowq.timeout, start_time); + timeout = ktime_add(timeout, start_time); + iowq.ls.timeout = timeout; } if (ext_arg->sig) { @@ -2686,7 +2688,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, /* if min timeout has been hit, don't reset wait count */ if (!iowq.hit_timeout) - nr_wait = (int) iowq.cq_tail - + nr_wait = (int) iowq.ls.cq_tail - READ_ONCE(ctx->rings->cq.tail); else nr_wait = 1; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index a4474eec8a13..caff186bc377 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -101,15 +101,23 @@ struct io_defer_entry { struct io_kiocb *req; }; +struct iou_loop_state { + /* + * The CQE index to wait for. Only serves as a hint and can still be + * woken up earlier. + */ + __u32 cq_tail; + ktime_t timeout; +}; + struct io_wait_queue { + struct iou_loop_state ls; struct wait_queue_entry wqe; struct io_ring_ctx *ctx; - unsigned cq_tail; unsigned cq_min_tail; unsigned nr_timeouts; int hit_timeout; ktime_t min_timeout; - ktime_t timeout; struct hrtimer t; #ifdef CONFIG_NET_RX_BUSY_POLL @@ -121,7 +129,7 @@ struct io_wait_queue { static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; - int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail; + int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->ls.cq_tail; /* * Wake up if we have enough events, or if a timeout occurred since we diff --git a/io_uring/napi.c b/io_uring/napi.c index 4a10de03e426..b804f8fdd883 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -360,8 +360,8 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) return; iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); - if (iowq->timeout != KTIME_MAX) { - ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx)); + if (iowq->ls.timeout != KTIME_MAX) { + ktime_t dt = ktime_sub(iowq->ls.timeout, io_get_time(ctx)); iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt); } -- 2.49.0