This patch adds two headers io_uring.h io_uring/cmd.h in bindings_helper for implementing rust io_uring abstraction. Signed-off-by: Sidong Yang --- rust/bindings/bindings_helper.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 84d60635e8a9..96beaea73755 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -75,6 +75,8 @@ #include #include #include +#include +#include #include #if defined(CONFIG_DRM_PANIC_SCREEN_QR_CODE) -- 2.43.0 The pdu field in io_uring_cmd may contain stale data when a request object is recycled from the slab cache. Accessing uninitialized or garbage memory can lead to undefined behavior in users of the pdu. Ensure the pdu buffer is cleared during io_uring_cmd_prep() so that each command starts from a well-defined state. This avoids exposing uninitialized memory and prevents potential misinterpretation of data from previous requests. No functional change is intended other than guaranteeing that pdu is always zero-initialized before use. Signed-off-by: Sidong Yang --- io_uring/uring_cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 053bac89b6c0..2492525d4e43 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -203,6 +203,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (!ac) return -ENOMEM; ioucmd->sqe = sqe; + memset(&ioucmd->pdu, 0, sizeof(ioucmd->pdu)); return 0; } -- 2.43.0 Implment the io-uring abstractions needed for miscdevicecs and other char devices that have io-uring command interface. * `io_uring::IoUringCmd` : Rust abstraction for `io_uring_cmd` which will be used as arg for `MiscDevice::uring_cmd()`. And driver can get `cmd_op` sent from userspace. Also it has `flags` which includes option that is reissued. * `io_uring::IoUringSqe` : Rust abstraction for `io_uring_sqe` which could be get from `IoUringCmd::sqe()` and driver could get `cmd_data` from userspace. Also `IoUringSqe` has more data like opcode could be used in driver. Signed-off-by: Sidong Yang --- rust/kernel/io_uring.rs | 306 ++++++++++++++++++++++++++++++++++++++++ rust/kernel/lib.rs | 1 + 2 files changed, 307 insertions(+) create mode 100644 rust/kernel/io_uring.rs diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs new file mode 100644 index 000000000000..61e88bdf4e42 --- /dev/null +++ b/rust/kernel/io_uring.rs @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: (C) 2025 Furiosa AI + +//! Abstractions for io-uring. +//! +//! This module provides types for implements io-uring interface for char device. +//! +//! +//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_uring/cmd.h) and +//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/io_uring.h) + +use core::{mem::MaybeUninit, pin::Pin}; + +use crate::error::from_result; +use crate::transmute::{AsBytes, FromBytes}; +use crate::{fs::File, types::Opaque}; + +use crate::prelude::*; + +/// io-uring opcode +pub mod opcode { + /// opcode for uring cmd + pub const URING_CMD: u32 = bindings::io_uring_op_IORING_OP_URING_CMD; +} + +/// A Rust abstraction for the Linux kernel's `io_uring_cmd` structure. +/// +/// This structure is a safe, opaque wrapper around the raw C `io_uring_cmd` +/// binding from the Linux kernel. It represents a command structure used +/// in io_uring operations within the kernel. +/// This type is used internally by the io_uring subsystem to manage +/// asynchronous I/O commands. +/// +/// This type should not be constructed or manipulated directly by +/// kernel module developers. +/// +/// # INVARIANT +/// - `self.inner` always points to a valid, live `bindings::io_uring_cmd`. +#[repr(transparent)] +pub struct IoUringCmd { + /// An opaque wrapper containing the actual `io_uring_cmd` data. + inner: Opaque, +} + +impl IoUringCmd { + /// Returns the cmd_op with associated with the `io_uring_cmd`. + #[inline] + pub fn cmd_op(&self) -> u32 { + // SAFETY: `self.inner` is guaranteed by the type invariant to point + // to a live `io_uring_cmd`, so dereferencing is safe. + unsafe { (*self.inner.get()).cmd_op } + } + + /// Returns the flags with associated with the `io_uring_cmd`. + #[inline] + pub fn flags(&self) -> u32 { + // SAFETY: `self.inner` is guaranteed by the type invariant to point + // to a live `io_uring_cmd`, so dereferencing is safe. + unsafe { (*self.inner.get()).flags } + } + + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd + /// + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. + #[inline] + pub fn read_pdu(&self) -> Result { + // SAFETY: `self.inner` is guaranteed by the type invariant to point + // to a live `io_uring_cmd`, so dereferencing is safe. + let inner = unsafe { &mut *self.inner.get() }; + + let len = size_of::(); + if len > inner.pdu.len() { + return Err(EFAULT); + } + + let mut out: MaybeUninit = MaybeUninit::uninit(); + let ptr = &raw mut inner.pdu as *const c_void; + + // SAFETY: + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked + // size of `T` is smaller than pdu size. + unsafe { + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::(), len); + } + + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements + // `FromBytes`, any bit-pattern is a valid value for this type. + Ok(unsafe { out.assume_init() }) + } + + /// Writes the provided `value` to `pdu` in uring_cmd `self` + /// + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. + #[inline] + pub fn write_pdu(&mut self, value: &T) -> Result<()> { + // SAFETY: `self.inner` is guaranteed by the type invariant to point + // to a live `io_uring_cmd`, so dereferencing is safe. + let inner = unsafe { &mut *self.inner.get() }; + + let len = size_of::(); + if len > inner.pdu.len() { + return Err(EFAULT); + } + + let src = (value as *const T).cast::(); + let dst = &raw mut inner.pdu as *mut c_void; + + // SAFETY: + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. + // * It's safe to copy because size of `T` is no more than len of pdu. + unsafe { + core::ptr::copy_nonoverlapping(src, dst, len); + } + + Ok(()) + } + + /// Constructs a new [`IoUringCmd`] from a raw `io_uring_cmd` + /// + /// # Safety + /// + /// The caller must guarantee that: + /// - `ptr` is non-null, properly aligned, and points to a valid + /// `bindings::io_uring_cmd`. + /// - The pointed-to memory remains initialized and valid for the entire + /// lifetime `'a` of the returned reference. + /// - While the returned `Pin<&'a mut IoUringCmd>` is alive, the underlying + /// object is **not moved** (pinning requirement). + /// - **Aliasing rules:** the returned `&mut` has **exclusive** access to the same + /// object for its entire lifetime: + /// - No other `&mut` **or** `&` references to the same `io_uring_cmd` may be + /// alive at the same time. + /// - There must be no concurrent reads/writes through raw pointers, FFI, or + /// other kernel paths to the same object during this lifetime. + /// - If the object can be touched from other contexts (e.g. IRQ/another CPU), + /// the caller must provide synchronization to uphold this exclusivity. + /// - This function relies on `IoUringCmd` being `repr(transparent)` over + /// `bindings::io_uring_cmd` so the cast preserves layout. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *mut bindings::io_uring_cmd) -> Pin<&'a mut IoUringCmd> { + // SAFETY: + // * The caller guarantees that the pointer is not dangling and stays + // valid for the duration of 'a. + // * The cast is okay because `IoUringCmd` is `repr(transparent)` and + // has the same memory layout as `bindings::io_uring_cmd`. + // * The returned `Pin` ensures that the object cannot be moved, which + // is required because the kernel may hold pointers to this memory + // location and moving it would invalidate those pointers. + unsafe { Pin::new_unchecked(&mut *ptr.cast()) } + } + + /// Returns the file that referenced by uring cmd self. + #[inline] + pub fn file(&self) -> &File { + // SAFETY: `self.inner` is guaranteed by the type invariant to point + // to a live `io_uring_cmd`, so dereferencing is safe. + let file = unsafe { (*self.inner.get()).file }; + + // SAFETY: + // * The `file` points valid file. + // * refcount is positive after submission queue entry issued. + // * There is no active fdget_pos region on the file on this thread. + unsafe { File::from_raw_file(file) } + } + + /// Returns an reference to the [`IoUringSqe`] associated with this command. + #[inline] + pub fn sqe(&self) -> &IoUringSqe { + // SAFETY: `self.inner` is guaranteed by the type invariant to point + // to a live `io_uring_cmd`, so dereferencing is safe. + let sqe = unsafe { (*self.inner.get()).sqe }; + // SAFETY: The call guarantees that the `sqe` points valid io_uring_sqe. + unsafe { IoUringSqe::from_raw(sqe) } + } + + /// Completes an this [`IoUringCmd`] request that was previously queued. + /// + /// # Safety + /// + /// - This function must be called **only** for a command whose `uring_cmd` + /// handler previously returned **`-EIOCBQUEUED`** to io_uring. + /// + /// # Parameters + /// + /// - `ret`: Result to return to userspace. + /// - `res2`: Extra for big completion queue entry `IORING_SETUP_CQE32`. + /// - `issue_flags`: Flags associated with this request, typically the same + /// as those passed to the `uring_cmd` handler. + #[inline] + pub fn done(self: Pin<&mut IoUringCmd>, ret: Result, res2: u64, issue_flags: u32) { + let ret = from_result(|| ret) as isize; + // SAFETY: The call guarantees that `self.inner` is not dangling and stays valid + unsafe { + bindings::io_uring_cmd_done(self.inner.get(), ret, res2, issue_flags); + } + } +} + +/// A Rust abstraction for the Linux kernel's `io_uring_sqe` structure. +/// +/// This structure is a safe, opaque wrapper around the raw C [`io_uring_sqe`](srctree/include/uapi/linux/io_uring.h) +/// binding from the Linux kernel. It represents a Submission Queue Entry +/// used in io_uring operations within the kernel. +/// +/// # Type Safety +/// +/// The `#[repr(transparent)]` attribute ensures that this wrapper has +/// the same memory layout as the underlying `io_uring_sqe` structure, +/// allowing it to be safely transmuted between the two representations. +/// +/// # Fields +/// +/// * `inner` - An opaque wrapper containing the actual `io_uring_sqe` data. +/// The `Opaque` type prevents direct access to the internal +/// structure fields, ensuring memory safety and encapsulation. +/// +/// # Usage +/// +/// This type represents a submission queue entry that describes an I/O +/// operation to be executed by the io_uring subsystem. It contains +/// information such as the operation type, file descriptor, buffer +/// pointers, and other operation-specific data. +/// +/// Users can obtain this type from [`IoUringCmd::sqe()`] method, which +/// extracts the submission queue entry associated with a command. +/// +/// This type should not be constructed or manipulated directly by +/// kernel module developers. +/// +/// # INVARIANT +/// - `self.inner` always points to a valid, live `bindings::io_uring_sqe`. +#[repr(transparent)] +pub struct IoUringSqe { + inner: Opaque, +} + +impl IoUringSqe { + /// Reads and interprets the `cmd` field of an `bindings::io_uring_sqe` as a value of type `T`. + /// + /// # Safety & Invariants + /// - Construction of `T` is delegated to `FromBytes`, which guarantees that `T` has no + /// invalid bit patterns and can be safely reconstructed from raw bytes. + /// - **Limitation:** This implementation does not support `IORING_SETUP_SQE128` (larger SQE entries). + /// Only the standard `io_uring_sqe` layout is handled here. + /// + /// # Errors + /// * Returns `EINVAL` if the `self` does not hold a `opcode::URING_CMD`. + /// * Returns `EFAULT` if the command buffer is smaller than the requested type `T`. + /// + /// # Returns + /// * On success, returns a `T` deserialized from the `cmd`. + /// * On failure, returns an appropriate error as described above. + pub fn cmd_data(&self) -> Result { + // SAFETY: `self.inner` guaranteed by the type invariant to point + // to a live `io_uring_sqe`, so dereferencing is safe. + let sqe = unsafe { &*self.inner.get() }; + + if u32::from(sqe.opcode) != opcode::URING_CMD { + return Err(EINVAL); + } + + // SAFETY: Accessing the `sqe.cmd` union field is safe because we've + // verified that `sqe.opcode == IORING_OP_URING_CMD`, which guarantees + // that this union variant is initialized and valid. + let cmd = unsafe { sqe.__bindgen_anon_6.cmd.as_ref() }; + let cmd_len = size_of_val(&sqe.__bindgen_anon_6.bindgen_union_field); + + if cmd_len < size_of::() { + return Err(EFAULT); + } + + let cmd_ptr = cmd.as_ptr() as *mut T; + + // SAFETY: `cmd_ptr` is valid from `self.inner` which is guaranteed by + // type variant. And also it points to initialized `T` from userspace. + let ret = unsafe { core::ptr::read_unaligned(cmd_ptr) }; + + Ok(ret) + } + + /// Constructs a new `IoUringSqe` from a raw `io_uring_sqe`. + /// + /// # Safety + /// + /// The caller must guarantee that: + /// - `ptr` is non-null, properly aligned, and points to a valid initialized + /// `bindings::io_uring_sqe`. + /// - The pointed-to memory remains valid (not freed or repurposed) for the + /// entire lifetime `'a` of the returned reference. + /// - **Aliasing rules (for `&T`):** while the returned `&'a IoUringSqe` is + /// alive, there must be **no mutable access** to the same object through any + /// path (no `&mut`, no raw-pointer writes, no FFI/IRQ/other-CPU writers). + /// Multiple `&` is fine **only if all of them are read-only** for the entire + /// overlapping lifetime. + /// - This relies on `IoUringSqe` being `repr(transparent)` over + /// `bindings::io_uring_sqe`, so the cast preserves layout. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) -> &'a IoUringSqe { + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the + // duration of 'a. The cast is okay because `IoUringSqe` is `repr(transparent)` and has the + // same memory layout as `bindings::io_uring_sqe`. + unsafe { &*ptr.cast() } + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index ed53169e795c..d38cf7137401 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -91,6 +91,7 @@ pub mod fs; pub mod init; pub mod io; +pub mod io_uring; pub mod ioctl; pub mod jump_label; #[cfg(CONFIG_KUNIT)] -- 2.43.0 This patch introduces support for `uring_cmd` to the `miscdevice` framework. This is achieved by adding a new `uring_cmd` method to the `MiscDevice` trait and wiring it up to the corresponding `file_operations` entry. The `uring_cmd` function provides a mechanism for `io_uring` to issue commands to a device driver. The new `uring_cmd` method takes the device, an `IoUringCmd` object, and issue flags as arguments. The `IoUringCmd` object is a safe Rust abstraction around the raw `io_uring_cmd` struct. To enable `uring_cmd` for a specific misc device, the `HAS_URING_CMD` constant must be set to `true` in the `MiscDevice` implementation. Signed-off-by: Sidong Yang --- rust/kernel/miscdevice.rs | 53 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index 6373fe183b27..fcef579218ba 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -11,9 +11,10 @@ use crate::{ bindings, device::Device, - error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR}, + error::{from_result, to_result, Error, Result, VTABLE_DEFAULT_ERROR}, ffi::{c_int, c_long, c_uint, c_ulong}, fs::File, + io_uring::IoUringCmd, mm::virt::VmaNew, prelude::*, seq_file::SeqFile, @@ -180,6 +181,21 @@ fn show_fdinfo( ) { build_error!(VTABLE_DEFAULT_ERROR) } + + /// Handler for uring_cmd. + /// + /// This function is invoked when userspace process submits an uring_cmd op + /// on io-uring submission queue. The `device` is borrowed instance defined + /// by `Ptr`. The `io_uring_cmd` would be used for get arguments cmd_op, sqe, + /// cmd_data. The `issue_flags` is the flags includes options for uring_cmd. + /// The options are listed in `kernel::io_uring::cmd_flags`. + fn uring_cmd( + _device: ::Borrowed<'_>, + _io_uring_cmd: Pin<&mut IoUringCmd>, + _issue_flags: u32, + ) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } } /// A vtable for the file operations of a Rust miscdevice. @@ -337,6 +353,36 @@ impl MiscdeviceVTable { T::show_fdinfo(device, m, file); } + /// # Safety + /// + /// The caller must ensure that: + /// - The pointer `ioucmd` is not null and points to a valid `bindings::io_uring_cmd`. + unsafe extern "C" fn uring_cmd( + ioucmd: *mut bindings::io_uring_cmd, + issue_flags: ffi::c_uint, + ) -> c_int { + // SAFETY: `file` referenced by `ioucmd` is valid pointer. It's assigned in + // uring cmd preparation. So dereferencing is safe. + let raw_file = unsafe { (*ioucmd).file }; + + // SAFETY: `private_data` is guaranteed that it has valid pointer after + // this file opened. So dereferencing is safe. + let private = unsafe { (*raw_file).private_data }.cast(); + + // SAFETY: `ioucmd` is not null and points to valid memory `bindings::io_uring_cmd` + // and the memory pointed by `ioucmd` is valid and will not be moved or + // freed for the lifetime of returned value `ioucmd` + let ioucmd = unsafe { IoUringCmd::from_raw(ioucmd) }; + + // SAFETY: This call is safe because `private` is returned by + // `into_foreign` in [`open`]. And it's guaranteed + // that `from_foreign` is called by [`release`] after the end of + // the lifetime of `device` + let device = unsafe { ::borrow(private) }; + + from_result(|| T::uring_cmd(device, ioucmd, issue_flags)) + } + const VTABLE: bindings::file_operations = bindings::file_operations { open: Some(Self::open), release: Some(Self::release), @@ -359,6 +405,11 @@ impl MiscdeviceVTable { } else { None }, + uring_cmd: if T::HAS_URING_CMD { + Some(Self::uring_cmd) + } else { + None + }, // SAFETY: All zeros is a valid value for `bindings::file_operations`. ..unsafe { MaybeUninit::zeroed().assume_init() } }; -- 2.43.0 This patch extends the `rust_misc_device` sample to demonstrate how to use the `uring_cmd` interface for asynchronous device operations. The new implementation handles two `uring_cmd` operations: * `RUST_MISC_DEV_URING_CMD_SET_VALUE`: Sets a value in the device. * `RUST_MISC_DEV_URING_CMD_GET_VALUE`: Gets a value from the device. To use this new functionality, users can submit `IORING_OP_URING_CMD` operations to the `rust_misc_device` character device. Signed-off-by: Sidong Yang --- samples/rust/rust_misc_device.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs index e7ab77448f75..1f25d2b1f4d8 100644 --- a/samples/rust/rust_misc_device.rs +++ b/samples/rust/rust_misc_device.rs @@ -101,6 +101,7 @@ c_str, device::Device, fs::File, + io_uring::IoUringCmd, ioctl::{_IO, _IOC_SIZE, _IOR, _IOW}, miscdevice::{MiscDevice, MiscDeviceOptions, MiscDeviceRegistration}, new_mutex, @@ -114,6 +115,9 @@ const RUST_MISC_DEV_GET_VALUE: u32 = _IOR::('|' as u32, 0x81); const RUST_MISC_DEV_SET_VALUE: u32 = _IOW::('|' as u32, 0x82); +const RUST_MISC_DEV_URING_CMD_SET_VALUE: u32 = _IOR::('|' as u32, 0x83); +const RUST_MISC_DEV_URING_CMD_GET_VALUE: u32 = _IOW::('|' as u32, 0x84); + module! { type: RustMiscDeviceModule, name: "rust_misc_device", @@ -192,6 +196,29 @@ fn ioctl(me: Pin<&RustMiscDevice>, _file: &File, cmd: u32, arg: usize) -> Result Ok(0) } + + fn uring_cmd( + me: Pin<&RustMiscDevice>, + io_uring_cmd: Pin<&mut IoUringCmd>, + _issue_flags: u32, + ) -> Result { + dev_info!(me.dev, "UringCmd Rust Misc Device Sample\n"); + + let cmd = io_uring_cmd.cmd_op(); + let addr: usize = io_uring_cmd.sqe().cmd_data()?; + let user_ptr = UserPtr::from_addr(addr); + let user_slice = UserSlice::new(user_ptr, 8); + + match cmd { + RUST_MISC_DEV_URING_CMD_SET_VALUE => me.set_value(user_slice.reader())?, + RUST_MISC_DEV_URING_CMD_GET_VALUE => me.get_value(user_slice.writer())?, + _ => { + dev_err!(me.dev, "-> uring_cmd not recognised: {}\n", cmd); + return Err(ENOTTY); + } + }; + Ok(0) + } } #[pinned_drop] -- 2.43.0