"wq" usually stands for wait queue and not for entries. Rename the wait_queue_entry field in struct io_wait_queue to "wqe" to avoid confusion. It's just a cosmetic change. Signed-off-by: Pavel Begunkov --- io_uring/io_uring.c | 14 +++++++------- io_uring/io_uring.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d11d0e9723a1..3eb4c9200bb2 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2453,7 +2453,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, int wake_flags, void *key) { - struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, wq); + struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, wqe); /* * Cannot safely flush overflowed CQEs from here, ensure we wake up @@ -2493,7 +2493,7 @@ static enum hrtimer_restart io_cqring_timer_wakeup(struct hrtimer *timer) WRITE_ONCE(iowq->hit_timeout, 1); iowq->min_timeout = 0; - wake_up_process(iowq->wq.private); + wake_up_process(iowq->wqe.private); return HRTIMER_NORESTART; } @@ -2646,9 +2646,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, if (__io_cqring_events_user(ctx) >= min_events) return 0; - init_waitqueue_func_entry(&iowq.wq, io_wake_function); - iowq.wq.private = current; - INIT_LIST_HEAD(&iowq.wq.entry); + init_waitqueue_func_entry(&iowq.wqe, io_wake_function); + iowq.wqe.private = current; + INIT_LIST_HEAD(&iowq.wqe.entry); iowq.ctx = ctx; iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail); @@ -2695,7 +2695,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, atomic_set(&ctx->cq_wait_nr, nr_wait); set_current_state(TASK_INTERRUPTIBLE); } else { - prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, + prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wqe, TASK_INTERRUPTIBLE); } @@ -2743,7 +2743,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, } while (1); if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) - finish_wait(&ctx->cq_wait, &iowq.wq); + finish_wait(&ctx->cq_wait, &iowq.wqe); restore_saved_sigmask_unless(ret == -EINTR); return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 23c268ab1c8f..53bc3ef14f9e 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -102,7 +102,7 @@ struct io_defer_entry { }; struct io_wait_queue { - struct wait_queue_entry wq; + struct wait_queue_entry wqe; struct io_ring_ctx *ctx; unsigned cq_tail; unsigned cq_min_tail; -- 2.49.0 The io_cqring_wait_schedule() caller don't differentiate between the helper returning 0 and 1, so simplify it and return 0 in both cases. Signed-off-by: Pavel Begunkov --- io_uring/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3eb4c9200bb2..b26c2a0a0295 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2607,11 +2607,11 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, ktime_t start_time) { if (unlikely(READ_ONCE(ctx->check_cq))) - return 1; + return 0; if (unlikely(io_local_work_pending(ctx))) - return 1; + return 0; if (unlikely(task_work_pending(current))) - return 1; + return 0; if (unlikely(task_sigpending(current))) return -EINTR; if (unlikely(io_should_wake(iowq))) -- 2.49.0 __io_run_local_work() will be used later by bpf code, export it. Signed-off-by: Pavel Begunkov --- io_uring/io_uring.c | 4 ++-- io_uring/io_uring.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index b26c2a0a0295..4139cfc84221 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1350,8 +1350,8 @@ static int __io_run_local_work_loop(struct llist_node **node, return ret; } -static int __io_run_local_work(struct io_ring_ctx *ctx, io_tw_token_t tw, - int min_events, int max_events) +int __io_run_local_work(struct io_ring_ctx *ctx, io_tw_token_t tw, + int min_events, int max_events) { struct llist_node *node; unsigned int loops = 0; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 53bc3ef14f9e..a4474eec8a13 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -140,6 +140,8 @@ int io_uring_fill_params(unsigned entries, struct io_uring_params *p); bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow, bool cqe32); int io_run_task_work_sig(struct io_ring_ctx *ctx); int io_run_local_work(struct io_ring_ctx *ctx, int min_events, int max_events); +int __io_run_local_work(struct io_ring_ctx *ctx, io_tw_token_t tw, + int min_events, int max_events); void io_req_defer_failed(struct io_kiocb *req, s32 res); bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags); void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags); -- 2.49.0 Add a structure that keeps arguments needed for the current round of waiting. Namely, the number of CQEs to wait for in a form of CQ tail index and timeout. Signed-off-by: Pavel Begunkov --- io_uring/io_uring.c | 22 ++++++++++++---------- io_uring/io_uring.h | 14 +++++++++++--- io_uring/napi.c | 4 ++-- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4139cfc84221..29f34fbcbb01 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2508,8 +2508,8 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer) struct io_ring_ctx *ctx = iowq->ctx; /* no general timeout, or shorter (or equal), we are done */ - if (iowq->timeout == KTIME_MAX || - ktime_compare(iowq->min_timeout, iowq->timeout) >= 0) + if (iowq->ls.timeout == KTIME_MAX || + ktime_compare(iowq->min_timeout, iowq->ls.timeout) >= 0) goto out_wake; /* work we may need to run, wake function will see if we need to wake */ if (io_has_work(ctx)) @@ -2535,7 +2535,7 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer) } hrtimer_update_function(&iowq->t, io_cqring_timer_wakeup); - hrtimer_set_expires(timer, iowq->timeout); + hrtimer_set_expires(timer, iowq->ls.timeout); return HRTIMER_RESTART; out_wake: return io_cqring_timer_wakeup(timer); @@ -2551,7 +2551,7 @@ static int io_cqring_schedule_timeout(struct io_wait_queue *iowq, hrtimer_setup_on_stack(&iowq->t, io_cqring_min_timer_wakeup, clock_id, HRTIMER_MODE_ABS); } else { - timeout = iowq->timeout; + timeout = iowq->ls.timeout; hrtimer_setup_on_stack(&iowq->t, io_cqring_timer_wakeup, clock_id, HRTIMER_MODE_ABS); } @@ -2592,7 +2592,7 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx, */ if (ext_arg->iowait && current_pending_io()) current->in_iowait = 1; - if (iowq->timeout != KTIME_MAX || iowq->min_timeout) + if (iowq->ls.timeout != KTIME_MAX || iowq->min_timeout) ret = io_cqring_schedule_timeout(iowq, ctx->clockid, start_time); else schedule(); @@ -2650,18 +2650,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, iowq.wqe.private = current; INIT_LIST_HEAD(&iowq.wqe.entry); iowq.ctx = ctx; - iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; + iowq.ls.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail); iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); iowq.hit_timeout = 0; iowq.min_timeout = ext_arg->min_time; - iowq.timeout = KTIME_MAX; + iowq.ls.timeout = KTIME_MAX; start_time = io_get_time(ctx); if (ext_arg->ts_set) { - iowq.timeout = timespec64_to_ktime(ext_arg->ts); + ktime_t timeout = timespec64_to_ktime(ext_arg->ts); + if (!(flags & IORING_ENTER_ABS_TIMER)) - iowq.timeout = ktime_add(iowq.timeout, start_time); + timeout = ktime_add(timeout, start_time); + iowq.ls.timeout = timeout; } if (ext_arg->sig) { @@ -2686,7 +2688,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, /* if min timeout has been hit, don't reset wait count */ if (!iowq.hit_timeout) - nr_wait = (int) iowq.cq_tail - + nr_wait = (int) iowq.ls.cq_tail - READ_ONCE(ctx->rings->cq.tail); else nr_wait = 1; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index a4474eec8a13..caff186bc377 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -101,15 +101,23 @@ struct io_defer_entry { struct io_kiocb *req; }; +struct iou_loop_state { + /* + * The CQE index to wait for. Only serves as a hint and can still be + * woken up earlier. + */ + __u32 cq_tail; + ktime_t timeout; +}; + struct io_wait_queue { + struct iou_loop_state ls; struct wait_queue_entry wqe; struct io_ring_ctx *ctx; - unsigned cq_tail; unsigned cq_min_tail; unsigned nr_timeouts; int hit_timeout; ktime_t min_timeout; - ktime_t timeout; struct hrtimer t; #ifdef CONFIG_NET_RX_BUSY_POLL @@ -121,7 +129,7 @@ struct io_wait_queue { static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; - int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail; + int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->ls.cq_tail; /* * Wake up if we have enough events, or if a timeout occurred since we diff --git a/io_uring/napi.c b/io_uring/napi.c index 4a10de03e426..b804f8fdd883 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -360,8 +360,8 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) return; iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); - if (iowq->timeout != KTIME_MAX) { - ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx)); + if (iowq->ls.timeout != KTIME_MAX) { + ktime_t dt = ktime_sub(iowq->ls.timeout, io_get_time(ctx)); iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt); } -- 2.49.0 Add some basic helpers and definitions for implementing bpf struct_ops. There are no callbaack yet, and registration will always fail. Signed-off-by: Pavel Begunkov --- include/linux/io_uring_types.h | 4 ++ io_uring/Kconfig | 5 ++ io_uring/Makefile | 1 + io_uring/bpf.c | 93 ++++++++++++++++++++++++++++++++++ io_uring/bpf.h | 21 ++++++++ io_uring/io_uring.c | 2 + 6 files changed, 126 insertions(+) create mode 100644 io_uring/bpf.c create mode 100644 io_uring/bpf.h diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index ef1af730193a..43432a06d177 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -8,6 +8,8 @@ #include #include +struct io_uring_ops; + enum { /* * A hint to not wake right away but delay until there are enough of @@ -276,6 +278,8 @@ struct io_ring_ctx { struct io_rings *rings; struct percpu_ref refs; + struct io_uring_ops *bpf_ops; + clockid_t clockid; enum tk_offsets clock_offset; diff --git a/io_uring/Kconfig b/io_uring/Kconfig index 4b949c42c0bf..b4dad9b74544 100644 --- a/io_uring/Kconfig +++ b/io_uring/Kconfig @@ -9,3 +9,8 @@ config IO_URING_ZCRX depends on PAGE_POOL depends on INET depends on NET_RX_BUSY_POLL + +config IO_URING_BPF + def_bool y + depends on IO_URING + depends on BPF_SYSCALL && BPF_JIT && DEBUG_INFO_BTF diff --git a/io_uring/Makefile b/io_uring/Makefile index bc4e4a3fa0a5..35eeeaf64489 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o obj-$(CONFIG_NET) += net.o cmd_net.o obj-$(CONFIG_PROC_FS) += fdinfo.o obj-$(CONFIG_IO_URING_MOCK_FILE) += mock_file.o +obj-$(CONFIG_IO_URING_BPF) += bpf.o diff --git a/io_uring/bpf.c b/io_uring/bpf.c new file mode 100644 index 000000000000..4cb5d25c9247 --- /dev/null +++ b/io_uring/bpf.c @@ -0,0 +1,93 @@ +#include + +#include "bpf.h" +#include "register.h" + +static struct io_uring_ops io_bpf_ops_stubs = { +}; + +static bool bpf_io_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (type != BPF_READ) + return false; + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) + return false; + if (off % size != 0) + return false; + + return btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_io_btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, int off, + int size) +{ + return -EACCES; +} + +static const struct bpf_verifier_ops bpf_io_verifier_ops = { + .get_func_proto = bpf_base_func_proto, + .is_valid_access = bpf_io_is_valid_access, + .btf_struct_access = bpf_io_btf_struct_access, +}; + +static int bpf_io_init(struct btf *btf) +{ + return 0; +} + +static int bpf_io_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + return 0; +} + +static int bpf_io_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + +static int bpf_io_reg(void *kdata, struct bpf_link *link) +{ + return -EOPNOTSUPP; +} + +static void bpf_io_unreg(void *kdata, struct bpf_link *link) +{ +} + +void io_unregister_bpf(struct io_ring_ctx *ctx) +{ +} + +static struct bpf_struct_ops bpf_io_uring_ops = { + .verifier_ops = &bpf_io_verifier_ops, + .reg = bpf_io_reg, + .unreg = bpf_io_unreg, + .check_member = bpf_io_check_member, + .init_member = bpf_io_init_member, + .init = bpf_io_init, + .cfi_stubs = &io_bpf_ops_stubs, + .name = "io_uring_ops", + .owner = THIS_MODULE, +}; + +static int __init io_uring_bpf_init(void) +{ + int ret; + + ret = register_bpf_struct_ops(&bpf_io_uring_ops, io_uring_ops); + if (ret) { + pr_err("io_uring: Failed to register struct_ops (%d)\n", ret); + return ret; + } + + return 0; +} +__initcall(io_uring_bpf_init); diff --git a/io_uring/bpf.h b/io_uring/bpf.h new file mode 100644 index 000000000000..34a51a57103d --- /dev/null +++ b/io_uring/bpf.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef IOU_BPF_H +#define IOU_BPF_H + +#include +#include + +#include "io_uring.h" + +struct io_uring_ops { +}; + +#ifdef CONFIG_IO_URING_BPF +void io_unregister_bpf(struct io_ring_ctx *ctx); +#else +static inline void io_unregister_bpf(struct io_ring_ctx *ctx) +{ +} +#endif + +#endif diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 29f34fbcbb01..5b80987ebb2c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -99,6 +99,7 @@ #include "msg_ring.h" #include "memmap.h" #include "zcrx.h" +#include "bpf.h" #include "timeout.h" #include "poll.h" @@ -2830,6 +2831,7 @@ static __cold void io_req_caches_free(struct io_ring_ctx *ctx) static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) { io_sq_thread_finish(ctx); + io_unregister_bpf(ctx); mutex_lock(&ctx->uring_lock); io_sqe_buffers_unregister(ctx); -- 2.49.0 Add a struct_ops callback called handle_events, which will be called off the CQ waiting loop every time there is an event that might be interesting to the program. The program takes the io_uring ctx and also a loop state, which it can use to set the number of events it wants to wait for as well as the timeout value. Signed-off-by: Pavel Begunkov --- io_uring/bpf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++ io_uring/bpf.h | 26 ++++++++++++++++++ io_uring/io_uring.c | 15 ++++++++++- 3 files changed, 104 insertions(+), 1 deletion(-) diff --git a/io_uring/bpf.c b/io_uring/bpf.c index 4cb5d25c9247..24dd2fe9134f 100644 --- a/io_uring/bpf.c +++ b/io_uring/bpf.c @@ -1,9 +1,18 @@ #include +#include #include "bpf.h" #include "register.h" +static const struct btf_type *loop_state_type; + +static int io_bpf_ops__loop(struct io_ring_ctx *ctx, struct iou_loop_state *ls) +{ + return IOU_RES_STOP; +} + static struct io_uring_ops io_bpf_ops_stubs = { + .loop = io_bpf_ops__loop, }; static bool bpf_io_is_valid_access(int off, int size, @@ -25,6 +34,17 @@ static int bpf_io_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size) { + const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); + + if (t == loop_state_type) { + if (off >= offsetof(struct iou_loop_state, cq_tail) && + off + size <= offsetofend(struct iou_loop_state, cq_tail)) + return SCALAR_VALUE; + if (off >= offsetof(struct iou_loop_state, timeout) && + off + size <= offsetofend(struct iou_loop_state, timeout)) + return SCALAR_VALUE; + } + return -EACCES; } @@ -34,8 +54,25 @@ static const struct bpf_verifier_ops bpf_io_verifier_ops = { .btf_struct_access = bpf_io_btf_struct_access, }; +static const struct btf_type * +io_lookup_struct_type(struct btf *btf, const char *name) +{ + s32 type_id; + + type_id = btf_find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (type_id < 0) + return NULL; + return btf_type_by_id(btf, type_id); +} + static int bpf_io_init(struct btf *btf) { + loop_state_type = io_lookup_struct_type(btf, "iou_loop_state"); + if (!loop_state_type) { + pr_err("io_uring: Failed to locate iou_loop_state\n"); + return -EINVAL; + } + return 0; } @@ -91,3 +128,30 @@ static int __init io_uring_bpf_init(void) return 0; } __initcall(io_uring_bpf_init); + +int io_run_cqwait_ops(struct io_ring_ctx *ctx, struct iou_loop_state *ls) +{ + int ret; + + io_run_task_work(); + + guard(mutex)(&ctx->uring_lock); + if (unlikely(!ctx->bpf_ops)) + return 1; + + if (unlikely(task_sigpending(current))) + return -EINTR; + + ret = ctx->bpf_ops->loop(ctx, ls); + if (ret == IOU_RES_STOP) + return 0; + + + if (io_local_work_pending(ctx)) { + unsigned nr_wait = ls->cq_tail - READ_ONCE(ctx->rings->cq.tail); + struct io_tw_state ts = {}; + + __io_run_local_work(ctx, ts, nr_wait, nr_wait); + } + return 1; +} diff --git a/io_uring/bpf.h b/io_uring/bpf.h index 34a51a57103d..0b7246c4f05b 100644 --- a/io_uring/bpf.h +++ b/io_uring/bpf.h @@ -7,15 +7,41 @@ #include "io_uring.h" +enum { + IOU_RES_WAIT, + IOU_RES_STOP, +}; + struct io_uring_ops { + int (*loop)(struct io_ring_ctx *ctx, struct iou_loop_state *ls); + + __u32 ring_fd; + void *priv; }; +static inline bool io_bpf_attached(struct io_ring_ctx *ctx) +{ + return IS_ENABLED(CONFIG_IO_URING_BPF) && ctx->bpf_ops != NULL; +} + +static inline bool io_has_cqwait_ops(struct io_ring_ctx *ctx) +{ + return io_bpf_attached(ctx); +} + + #ifdef CONFIG_IO_URING_BPF void io_unregister_bpf(struct io_ring_ctx *ctx); +int io_run_cqwait_ops(struct io_ring_ctx *ctx, struct iou_loop_state *ls); #else static inline void io_unregister_bpf(struct io_ring_ctx *ctx) { } +static inline int io_run_cqwait_ops(struct io_ring_ctx *ctx, + struct iou_loop_state *ls) +{ + return IOU_RES_STOP; +} #endif #endif diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5b80987ebb2c..1d5e3dd6c608 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2633,6 +2633,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, ktime_t start_time; int ret; + min_events = min_t(int, min_events, ctx->cq_entries); if (!io_allowed_run_tw(ctx)) @@ -2644,8 +2645,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, if (unlikely(test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))) io_cqring_do_overflow_flush(ctx); - if (__io_cqring_events_user(ctx) >= min_events) + + if (io_has_cqwait_ops(ctx)) { + if (ext_arg->min_time) + return -EINVAL; + } else if (__io_cqring_events_user(ctx) >= min_events) { return 0; + } init_waitqueue_func_entry(&iowq.wqe, io_wake_function); iowq.wqe.private = current; @@ -2706,6 +2712,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, __set_current_state(TASK_RUNNING); atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT); + if (io_has_cqwait_ops(ctx)) { + ret = io_run_cqwait_ops(ctx, &iowq.ls); + if (ret <= 0) + break; + continue; + } + /* * Run task_work after scheduling and before io_should_wake(). * If we got woken because of task_work being processed, run it -- 2.49.0 Add ring_fd to the struct_ops and implement [un]registration. Signed-off-by: Pavel Begunkov --- include/linux/io_uring_types.h | 2 + io_uring/bpf.c | 69 +++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 43432a06d177..3a71ed2d05ea 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -274,6 +274,8 @@ struct io_ring_ctx { unsigned int compat: 1; unsigned int iowq_limits_set : 1; + unsigned int bpf_installed: 1; + struct task_struct *submitter_task; struct io_rings *rings; struct percpu_ref refs; diff --git a/io_uring/bpf.c b/io_uring/bpf.c index 24dd2fe9134f..683e87f1a58b 100644 --- a/io_uring/bpf.c +++ b/io_uring/bpf.c @@ -4,6 +4,7 @@ #include "bpf.h" #include "register.h" +static DEFINE_MUTEX(io_bpf_ctrl_mutex); static const struct btf_type *loop_state_type; static int io_bpf_ops__loop(struct io_ring_ctx *ctx, struct iou_loop_state *ls) @@ -87,20 +88,86 @@ static int bpf_io_init_member(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata) { + u32 moff = __btf_member_bit_offset(t, member) / 8; + const struct io_uring_ops *uops = udata; + struct io_uring_ops *ops = kdata; + + switch (moff) { + case offsetof(struct io_uring_ops, ring_fd): + ops->ring_fd = uops->ring_fd; + return 1; + } + return 0; +} + +static int io_install_bpf(struct io_ring_ctx *ctx, struct io_uring_ops *ops) +{ + if (ctx->bpf_ops) + return -EBUSY; + ops->priv = ctx; + ctx->bpf_ops = ops; + ctx->bpf_installed = 1; return 0; } static int bpf_io_reg(void *kdata, struct bpf_link *link) { - return -EOPNOTSUPP; + struct io_uring_ops *ops = kdata; + struct io_ring_ctx *ctx; + struct file *file; + int ret = -EBUSY; + + file = io_uring_register_get_file(ops->ring_fd, false); + if (IS_ERR(file)) + return PTR_ERR(file); + ctx = file->private_data; + + scoped_guard(mutex, &io_bpf_ctrl_mutex) { + guard(mutex)(&ctx->uring_lock); + ret = io_install_bpf(ctx, ops); + } + + fput(file); + return ret; +} + +static void io_eject_bpf(struct io_ring_ctx *ctx) +{ + struct io_uring_ops *ops = ctx->bpf_ops; + + if (!WARN_ON_ONCE(!ops)) + return; + if (WARN_ON_ONCE(ops->priv != ctx)) + return; + + ops->priv = NULL; + ctx->bpf_ops = NULL; } static void bpf_io_unreg(void *kdata, struct bpf_link *link) { + struct io_uring_ops *ops = kdata; + struct io_ring_ctx *ctx; + + guard(mutex)(&io_bpf_ctrl_mutex); + ctx = ops->priv; + if (ctx) { + guard(mutex)(&ctx->uring_lock); + if (WARN_ON_ONCE(ctx->bpf_ops != ops)) + return; + + io_eject_bpf(ctx); + } } void io_unregister_bpf(struct io_ring_ctx *ctx) { + if (!ctx->bpf_installed) + return; + guard(mutex)(&io_bpf_ctrl_mutex); + guard(mutex)(&ctx->uring_lock); + if (ctx->bpf_ops) + io_eject_bpf(ctx); } static struct bpf_struct_ops bpf_io_uring_ops = { -- 2.49.0 A handle_events program should be able to parse the CQ and submit new requests, add kfuncs to cover that. The only essential kfunc here is bpf_io_uring_submit_sqes, and the rest are likely be removed in a non-RFC version in favour of a more general approach. Signed-off-by: Pavel Begunkov --- io_uring/bpf.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/io_uring/bpf.c b/io_uring/bpf.c index 683e87f1a58b..006cea78cc10 100644 --- a/io_uring/bpf.c +++ b/io_uring/bpf.c @@ -3,10 +3,55 @@ #include "bpf.h" #include "register.h" +#include "memmap.h" static DEFINE_MUTEX(io_bpf_ctrl_mutex); static const struct btf_type *loop_state_type; +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_io_uring_submit_sqes(struct io_ring_ctx *ctx, u32 nr) +{ + return io_submit_sqes(ctx, nr); +} + +__bpf_kfunc +__u8 *bpf_io_uring_get_region(struct io_ring_ctx *ctx, __u32 region_id, + const size_t rdwr_buf_size) +{ + struct io_mapped_region *r; + + switch (region_id) { + case 0: + r = &ctx->ring_region; + break; + case 1: + r = &ctx->sq_region; + break; + case 2: + r = &ctx->param_region; + break; + default: + return NULL; + } + + if (unlikely(rdwr_buf_size > io_region_size(r))) + return NULL; + return io_region_get_ptr(r); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(io_uring_kfunc_set) +BTF_ID_FLAGS(func, bpf_io_uring_submit_sqes, KF_SLEEPABLE | KF_TRUSTED_ARGS); +BTF_ID_FLAGS(func, bpf_io_uring_get_region, KF_RET_NULL | KF_TRUSTED_ARGS); +BTF_KFUNCS_END(io_uring_kfunc_set) + +static const struct btf_kfunc_id_set bpf_io_uring_kfunc_set = { + .owner = THIS_MODULE, + .set = &io_uring_kfunc_set, +}; + static int io_bpf_ops__loop(struct io_ring_ctx *ctx, struct iou_loop_state *ls) { return IOU_RES_STOP; @@ -68,12 +113,20 @@ io_lookup_struct_type(struct btf *btf, const char *name) static int bpf_io_init(struct btf *btf) { + int ret; + loop_state_type = io_lookup_struct_type(btf, "iou_loop_state"); if (!loop_state_type) { pr_err("io_uring: Failed to locate iou_loop_state\n"); return -EINVAL; } + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, + &bpf_io_uring_kfunc_set); + if (ret) { + pr_err("io_uring: Failed to register kfuncs (%d)\n", ret); + return ret; + } return 0; } -- 2.49.0 Add a helper for creating a ring from parameters and add support for IORING_SETUP_NO_SQARRAY. Signed-off-by: Pavel Begunkov --- tools/include/io_uring/mini_liburing.h | 57 +++++++++++++++++++------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/tools/include/io_uring/mini_liburing.h b/tools/include/io_uring/mini_liburing.h index 9ccb16074eb5..a90b7fb85bbb 100644 --- a/tools/include/io_uring/mini_liburing.h +++ b/tools/include/io_uring/mini_liburing.h @@ -6,6 +6,7 @@ #include #include #include +#include struct io_sq_ring { unsigned int *head; @@ -55,6 +56,7 @@ struct io_uring { struct io_uring_sq sq; struct io_uring_cq cq; int ring_fd; + unsigned flags; }; #if defined(__x86_64) || defined(__i386__) @@ -72,7 +74,14 @@ static inline int io_uring_mmap(int fd, struct io_uring_params *p, void *ptr; int ret; - sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int); + if (p->flags & IORING_SETUP_NO_SQARRAY) { + sq->ring_sz = p->cq_off.cqes; + sq->ring_sz += p->cq_entries * sizeof(struct io_uring_cqe); + } else { + sq->ring_sz = p->sq_off.array; + sq->ring_sz += p->sq_entries * sizeof(unsigned int); + } + ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); if (ptr == MAP_FAILED) @@ -83,7 +92,8 @@ static inline int io_uring_mmap(int fd, struct io_uring_params *p, sq->kring_entries = ptr + p->sq_off.ring_entries; sq->kflags = ptr + p->sq_off.flags; sq->kdropped = ptr + p->sq_off.dropped; - sq->array = ptr + p->sq_off.array; + if (!(p->flags & IORING_SETUP_NO_SQARRAY)) + sq->array = ptr + p->sq_off.array; size = p->sq_entries * sizeof(struct io_uring_sqe); sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, @@ -126,28 +136,39 @@ static inline int io_uring_enter(int fd, unsigned int to_submit, flags, sig, _NSIG / 8); } -static inline int io_uring_queue_init(unsigned int entries, +static inline int io_uring_queue_init_params(unsigned int entries, struct io_uring *ring, - unsigned int flags) + struct io_uring_params *p) { - struct io_uring_params p; int fd, ret; memset(ring, 0, sizeof(*ring)); - memset(&p, 0, sizeof(p)); - p.flags = flags; - fd = io_uring_setup(entries, &p); + fd = io_uring_setup(entries, p); if (fd < 0) return fd; - ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); - if (!ret) + ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); + if (!ret) { ring->ring_fd = fd; - else + ring->flags = p->flags; + } else { close(fd); + } return ret; } +static inline int io_uring_queue_init(unsigned int entries, + struct io_uring *ring, + unsigned int flags) +{ + struct io_uring_params p; + + memset(&p, 0, sizeof(p)); + p.flags = flags; + + return io_uring_queue_init_params(entries, ring, &p); +} + /* Get a sqe */ static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) { @@ -199,10 +220,18 @@ static inline int io_uring_submit(struct io_uring *ring) ktail = *sq->ktail; to_submit = sq->sqe_tail - sq->sqe_head; - for (submitted = 0; submitted < to_submit; submitted++) { - read_barrier(); - sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + + if (!(ring->flags & IORING_SETUP_NO_SQARRAY)) { + for (submitted = 0; submitted < to_submit; submitted++) { + read_barrier(); + sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + } + } else { + ktail += to_submit; + sq->sqe_head += to_submit; + submitted = to_submit; } + if (!submitted) return 0; -- 2.49.0 Add a io_uring bpf selftest/example. runner.c sets up a ring and BPF and calls io_uring_enter syscall to run the BPF program. All the execution logic is in basic.bpf.c, which creates a request, waits for its completion and repeats it N=10 times, after which it terminates. The makefile is borrowed from sched_ext. Note, it doesn't need to be all in BPF and can be intermingled with userspace code. This needs a separate example. Signed-off-by: Pavel Begunkov --- tools/testing/selftests/Makefile | 3 +- tools/testing/selftests/io_uring/Makefile | 164 +++++++++++++++++++ tools/testing/selftests/io_uring/basic.bpf.c | 81 +++++++++ tools/testing/selftests/io_uring/common.h | 2 + tools/testing/selftests/io_uring/runner.c | 80 +++++++++ tools/testing/selftests/io_uring/types.bpf.h | 136 +++++++++++++++ 6 files changed, 465 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/io_uring/Makefile create mode 100644 tools/testing/selftests/io_uring/basic.bpf.c create mode 100644 tools/testing/selftests/io_uring/common.h create mode 100644 tools/testing/selftests/io_uring/runner.c create mode 100644 tools/testing/selftests/io_uring/types.bpf.h diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c46ebdb9b8ef..31dd369a7154 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -129,6 +129,7 @@ TARGETS += vfio TARGETS += x86 TARGETS += x86/bugs TARGETS += zram +TARGETS += io_uring #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or # "make quicktest=1 kselftest" from top level Makefile @@ -146,7 +147,7 @@ endif # User can optionally provide a TARGETS skiplist. By default we skip # targets using BPF since it has cutting edge build time dependencies # which require more effort to install. -SKIP_TARGETS ?= bpf sched_ext +SKIP_TARGETS ?= bpf sched_ext io_uring ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) diff --git a/tools/testing/selftests/io_uring/Makefile b/tools/testing/selftests/io_uring/Makefile new file mode 100644 index 000000000000..7dfba422e5a6 --- /dev/null +++ b/tools/testing/selftests/io_uring/Makefile @@ -0,0 +1,164 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../../../build/Build.include +include ../../../scripts/Makefile.arch +include ../../../scripts/Makefile.include + +TEST_GEN_PROGS := runner + +# override lib.mk's default rules +OVERRIDE_TARGETS := 1 +include ../lib.mk + +CURDIR := $(abspath .) +REPOROOT := $(abspath ../../../..) +TOOLSDIR := $(REPOROOT)/tools +LIBDIR := $(TOOLSDIR)/lib +BPFDIR := $(LIBDIR)/bpf +TOOLSINCDIR := $(TOOLSDIR)/include +BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool +APIDIR := $(TOOLSINCDIR)/uapi +GENDIR := $(REPOROOT)/include/generated +GENHDR := $(GENDIR)/autoconf.h + +OUTPUT_DIR := $(OUTPUT)/build +OBJ_DIR := $(OUTPUT_DIR)/obj +INCLUDE_DIR := $(OUTPUT_DIR)/include +BPFOBJ_DIR := $(OBJ_DIR)/libbpf +IOUOBJ_DIR := $(OBJ_DIR)/io_uring +LIBBPF_OUTPUT := $(OBJ_DIR)/libbpf/libbpf.a +BPFOBJ := $(BPFOBJ_DIR)/libbpf.a + +DEFAULT_BPFTOOL := $(OUTPUT_DIR)/host/sbin/bpftool +HOST_OBJ_DIR := $(OBJ_DIR)/host/bpftool +HOST_LIBBPF_OUTPUT := $(OBJ_DIR)/host/libbpf/ +HOST_LIBBPF_DESTDIR := $(OUTPUT_DIR)/host/ +HOST_DESTDIR := $(OUTPUT_DIR)/host/ + +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +ifeq ($(VMLINUX_BTF),) +$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") +endif + +BPFTOOL ?= $(DEFAULT_BPFTOOL) + +ifneq ($(wildcard $(GENHDR)),) + GENFLAGS := -DHAVE_GENHDR +endif + +CFLAGS += -g -O2 -rdynamic -pthread -Wall -Werror $(GENFLAGS) \ + -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ + -I$(TOOLSINCDIR) -I$(APIDIR) -I$(CURDIR)/include + +# Silence some warnings when compiled with clang +ifneq ($(LLVM),) +CFLAGS += -Wno-unused-command-line-argument +endif + +LDFLAGS = -lelf -lz -lpthread -lzstd + +IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - &1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - $@ +else + $(call msg,CP,,$@) + $(Q)cp "$(VMLINUX_H)" $@ +endif + +$(IOUOBJ_DIR)/%.bpf.o: %.bpf.c $(INCLUDE_DIR)/vmlinux.h | $(BPFOBJ) $(IOUOBJ_DIR) + $(call msg,CLNG-BPF,,$(notdir $@)) + $(Q)$(CLANG) $(BPF_CFLAGS) -target bpf -c $< -o $@ + +$(INCLUDE_DIR)/%.bpf.skel.h: $(IOUOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BPFTOOL) | $(INCLUDE_DIR) + $(eval sched=$(notdir $@)) + $(call msg,GEN-SKEL,,$(sched)) + $(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $< + $(Q)$(BPFTOOL) gen object $(<:.o=.linked2.o) $(<:.o=.linked1.o) + $(Q)$(BPFTOOL) gen object $(<:.o=.linked3.o) $(<:.o=.linked2.o) + $(Q)diff $(<:.o=.linked2.o) $(<:.o=.linked3.o) + $(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked3.o) name $(subst .bpf.skel.h,,$(sched)) > $@ + $(Q)$(BPFTOOL) gen subskeleton $(<:.o=.linked3.o) name $(subst .bpf.skel.h,,$(sched)) > $(@:.skel.h=.subskel.h) + +override define CLEAN + rm -rf $(OUTPUT_DIR) + rm -f $(TEST_GEN_PROGS) +endef + +all_test_bpfprogs := $(foreach prog,$(wildcard *.bpf.c),$(INCLUDE_DIR)/$(patsubst %.c,%.skel.h,$(prog))) + +$(IOUOBJ_DIR)/runner.o: runner.c $(all_test_bpfprogs) | $(IOUOBJ_DIR) $(BPFOBJ) + $(CC) $(CFLAGS) -c $< -o $@ + +$(OUTPUT)/runner: $(IOUOBJ_DIR)/runner.o $(BPFOBJ) + @echo "$(testcase-targets)" + echo 111 + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +.DEFAULT_GOAL := all + +.DELETE_ON_ERROR: + +.SECONDARY: diff --git a/tools/testing/selftests/io_uring/basic.bpf.c b/tools/testing/selftests/io_uring/basic.bpf.c new file mode 100644 index 000000000000..c7954146ae4d --- /dev/null +++ b/tools/testing/selftests/io_uring/basic.bpf.c @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include "types.bpf.h" +#include "common.h" + +extern int bpf_io_uring_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) __ksym; +extern __u8 *bpf_io_uring_get_region(struct io_ring_ctx *ctx, __u32 region_id, + const __u64 rdwr_buf_size) __ksym; + +static inline void io_bpf_wait_nr(struct io_ring_ctx *ring, + struct iou_loop_state *ls, int nr) +{ + ls->cq_tail = ring->rings->cq.head + nr; +} + +enum { + RINGS_REGION_ID = 0, + SQ_REGION_ID = 1, +}; + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; +int reqs_to_run; + +SEC("struct_ops.s/link_loop") +int BPF_PROG(link_loop, struct io_ring_ctx *ring, struct iou_loop_state *ls) +{ + struct ring_hdr *sq_hdr, *cq_hdr; + struct io_uring_cqe *cqe, *cqes; + struct io_uring_sqe *sqes, *sqe; + void *rings; + int ret; + + sqes = (void *)bpf_io_uring_get_region(ring, SQ_REGION_ID, + SQ_ENTRIES * sizeof(struct io_uring_sqe)); + rings = (void *)bpf_io_uring_get_region(ring, RINGS_REGION_ID, + 64 + CQ_ENTRIES * sizeof(struct io_uring_cqe)); + if (!rings || !sqes) { + bpf_printk("error: can't get regions"); + return IOU_LOOP_STOP; + } + + sq_hdr = rings; + cq_hdr = sq_hdr + 1; + cqes = rings + 64; + + if (cq_hdr->tail != cq_hdr->head) { + unsigned cq_mask = CQ_ENTRIES - 1; + + cqe = &cqes[cq_hdr->head++ & cq_mask]; + bpf_printk("found cqe: data %lu res %i", + (unsigned long)cqe->user_data, (int)cqe->res); + + int left = --reqs_to_run; + if (left <= 0) { + bpf_printk("finished"); + return IOU_LOOP_STOP; + } + } + + bpf_printk("queue nop request, data %lu\n", (unsigned long)reqs_to_run); + sqe = &sqes[sq_hdr->tail & (SQ_ENTRIES - 1)]; + sqe->user_data = reqs_to_run; + sq_hdr->tail++; + + ret = bpf_io_uring_submit_sqes(ring, 1); + if (ret != 1) { + bpf_printk("bpf submit failed %i", ret); + return IOU_LOOP_STOP; + } + + io_bpf_wait_nr(ring, ls, 1); + return IOU_LOOP_WAIT; +} + +SEC(".struct_ops") +struct io_uring_ops basic_ops = { + .loop = (void *)link_loop, +}; diff --git a/tools/testing/selftests/io_uring/common.h b/tools/testing/selftests/io_uring/common.h new file mode 100644 index 000000000000..b86914f756f2 --- /dev/null +++ b/tools/testing/selftests/io_uring/common.h @@ -0,0 +1,2 @@ +#define CQ_ENTRIES 8 +#define SQ_ENTRIES 8 diff --git a/tools/testing/selftests/io_uring/runner.c b/tools/testing/selftests/io_uring/runner.c new file mode 100644 index 000000000000..f4226d576220 --- /dev/null +++ b/tools/testing/selftests/io_uring/runner.c @@ -0,0 +1,80 @@ +#include +#include +#include +#include + +#include +#include "basic.bpf.skel.h" +#include "common.h" + +struct basic *skel; +struct bpf_link *basic_link; + +static void setup_ring(struct io_uring *ring) +{ + struct io_uring_params params; + int ret; + + memset(¶ms, 0, sizeof(params)); + params.cq_entries = CQ_ENTRIES; + params.flags = IORING_SETUP_SINGLE_ISSUER | + IORING_SETUP_DEFER_TASKRUN | + IORING_SETUP_NO_SQARRAY | + IORING_SETUP_CQSIZE; + + ret = io_uring_queue_init_params(SQ_ENTRIES, ring, ¶ms); + if (ret) { + fprintf(stderr, "ring init failed\n"); + exit(1); + } +} + +static void setup_bpf_ops(struct io_uring *ring) +{ + int ret; + + skel = basic__open(); + if (!skel) { + fprintf(stderr, "can't generate skeleton\n"); + exit(1); + } + + skel->struct_ops.basic_ops->ring_fd = ring->ring_fd; + skel->bss->reqs_to_run = 10; + + ret = basic__load(skel); + if (ret) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + basic_link = bpf_map__attach_struct_ops(skel->maps.basic_ops); + if (!basic_link) { + fprintf(stderr, "failed to attach ops\n"); + exit(1); + } +} + +static void run_ring(struct io_uring *ring) +{ + int ret; + + ret = io_uring_enter(ring->ring_fd, 0, 0, IORING_ENTER_GETEVENTS, NULL); + if (ret) { + fprintf(stderr, "run failed\n"); + exit(1); + } +} + +int main() { + struct io_uring ring; + + setup_ring(&ring); + setup_bpf_ops(&ring); + + run_ring(&ring); + + bpf_link__destroy(basic_link); + basic__destroy(skel); + return 0; +} diff --git a/tools/testing/selftests/io_uring/types.bpf.h b/tools/testing/selftests/io_uring/types.bpf.h new file mode 100644 index 000000000000..f2345fa68c4a --- /dev/null +++ b/tools/testing/selftests/io_uring/types.bpf.h @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +#include +#include + +struct io_uring { + __u32 head; + __u32 tail; +}; + +struct io_rings { + struct io_uring sq, cq; + __u32 cq_overflow; +}; + +struct io_ring_ctx { + unsigned int flags; + struct io_rings *rings; +}; + +struct io_uring_sqe { + __u8 opcode; /* type of operation for this sqe */ + __u8 flags; /* IOSQE_ flags */ + __u16 ioprio; /* ioprio for the request */ + __s32 fd; /* file descriptor to do IO on */ + union { + __u64 off; /* offset into file */ + __u64 addr2; + struct { + __u32 cmd_op; + __u32 __pad1; + }; + }; + union { + __u64 addr; /* pointer to buffer or iovecs */ + __u64 splice_off_in; + struct { + __u32 level; + __u32 optname; + }; + }; + __u32 len; /* buffer size or number of iovecs */ + union { + __u32 fsync_flags; + __u16 poll_events; /* compatibility */ + __u32 poll32_events; /* word-reversed for BE */ + __u32 sync_range_flags; + __u32 msg_flags; + __u32 timeout_flags; + __u32 accept_flags; + __u32 cancel_flags; + __u32 open_flags; + __u32 statx_flags; + __u32 fadvise_advice; + __u32 splice_flags; + __u32 rename_flags; + __u32 unlink_flags; + __u32 hardlink_flags; + __u32 xattr_flags; + __u32 msg_ring_flags; + __u32 uring_cmd_flags; + __u32 waitid_flags; + __u32 futex_flags; + __u32 install_fd_flags; + __u32 nop_flags; + __u32 pipe_flags; + }; + __u64 user_data; /* data to be passed back at completion time */ + /* pack this to avoid bogus arm OABI complaints */ + union { + /* index into fixed buffers, if used */ + __u16 buf_index; + /* for grouped buffer selection */ + __u16 buf_group; + } __attribute__((packed)); + /* personality to use, if used */ + __u16 personality; + union { + __s32 splice_fd_in; + __u32 file_index; + __u32 zcrx_ifq_idx; + __u32 optlen; + struct { + __u16 addr_len; + __u16 __pad3[1]; + }; + }; + union { + struct { + __u64 addr3; + __u64 __pad2[1]; + }; + struct { + __u64 attr_ptr; /* pointer to attribute information */ + __u64 attr_type_mask; /* bit mask of attributes */ + }; + __u64 optval; + /* + * If the ring is initialized with IORING_SETUP_SQE128, then + * this field is used for 80 bytes of arbitrary command data + */ + __u8 cmd[0]; + }; +}; + +struct io_uring_cqe { + __u64 user_data; + __s32 res; + __u32 flags; +}; + + +struct iou_loop_state { + /* + * The CQE index to wait for. Only serves as a hint and can still be + * woken up earlier. + */ + __u32 cq_tail; + __s64 timeout; +}; + +struct io_uring_ops { + int (*loop)(struct io_ring_ctx *ctx, struct iou_loop_state *ls); + + __u32 ring_fd; + void *priv; +}; + +enum { + IOU_LOOP_WAIT, + IOU_LOOP_STOP, +}; + +struct ring_hdr { + __u32 head; + __u32 tail; +}; -- 2.49.0