Signed-off-by: Alice Ryhl --- rust/kernel/sync/poll.rs | 163 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 2 deletions(-) diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs index 0ec985d560c8d3405c08dbd86e48b14c7c34484d..9fb004c71c78375bb16ded7f518aa282cdcc50f5 100644 --- a/rust/kernel/sync/poll.rs +++ b/rust/kernel/sync/poll.rs @@ -5,12 +5,16 @@ //! Utilities for working with `struct poll_table`. use crate::{ - bindings, + alloc::AllocError, + bindings, container_of, fs::File, prelude::*, + sync::atomic::{Acquire, Atomic, Relaxed, Release}, + sync::lock::{Backend, Lock}, sync::{CondVar, LockClassKey}, + types::Opaque, }; -use core::{marker::PhantomData, ops::Deref}; +use core::{marker::PhantomData, ops::Deref, ptr}; /// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class. #[macro_export] @@ -22,6 +26,17 @@ macro_rules! new_poll_condvar { }; } +/// Creates a [`UpgradePollCondVar`] initialiser with the given name and a newly-created lock +/// class. +#[macro_export] +macro_rules! new_upgrade_poll_condvar { + ($($name:literal)?) => { + $crate::sync::poll::UpgradePollCondVar::new( + $crate::optional_name!($($name)?), $crate::static_lock_class!() + ) + }; +} + /// Wraps the kernel's `poll_table`. /// /// # Invariants @@ -66,6 +81,7 @@ pub fn register_wait(&self, file: &File, cv: &PollCondVar) { /// /// [`CondVar`]: crate::sync::CondVar #[pin_data(PinnedDrop)] +#[repr(transparent)] pub struct PollCondVar { #[pin] inner: CondVar, @@ -78,6 +94,17 @@ pub fn new(name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit inner <- CondVar::new(name, key), }) } + + /// Use this `CondVar` as a `PollCondVar`. + /// + /// # Safety + /// + /// After the last use of the returned `&PollCondVar`, `__wake_up_pollfree` must be called on + /// the `wait_queue_head` at least one grace period before the `CondVar` is destroyed. + unsafe fn from_non_poll(c: &CondVar) -> &PollCondVar { + // SAFETY: Layout is the same. Caller ensures that PollTables are cleared in time. + unsafe { &*ptr::from_ref(c).cast() } + } } // Make the `CondVar` methods callable on `PollCondVar`. @@ -104,3 +131,135 @@ fn drop(self: Pin<&mut Self>) { unsafe { bindings::synchronize_rcu() }; } } + +/// Wrapper around [`CondVar`] that can be upgraded to [`PollCondVar`]. +/// +/// By using this wrapper, you can avoid rcu for cases that don't use [`PollTable`], and in all +/// cases you can avoid `synchronize_rcu()`. +/// +/// # Invariants +/// +/// `active` either references `simple`, or a `kmalloc` allocation holding an +/// `UpgradePollCondVarInner`. In the latter case, the allocation remains valid until +/// `Self::drop()` plus one grace period. +#[pin_data(PinnedDrop)] +pub struct UpgradePollCondVar { + #[pin] + simple: CondVar, + active: Atomic<*mut CondVar>, +} + +#[pin_data] +#[repr(C)] +struct UpgradePollCondVarInner { + #[pin] + upgraded: CondVar, + #[pin] + rcu: Opaque, +} + +impl UpgradePollCondVar { + /// Constructs a new upgradable condvar initialiser. + pub fn new(name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit { + pin_init!(&this in Self { + simple <- CondVar::new(name, key), + // SAFETY: `this->inner` is in-bounds. + active: Atomic::new((unsafe { &raw const (*this.as_ptr()).simple }).cast_mut()), + }) + } + + /// Obtain a [`PollCondVar`], upgrading if necessary. + /// + /// You should use the same lock as what is passed to the `wait_*` methods. Otherwise wakeups + /// may be missed. + pub fn poll( + &self, + lock: &Lock, + name: &'static CStr, + key: Pin<&'static LockClassKey>, + ) -> Result<&PollCondVar, AllocError> { + let mut ptr = self.active.load(Acquire).cast_const(); + if ptr::eq(ptr, &self.simple) { + self.upgrade(lock, name, key)?; + ptr = self.active.load(Acquire).cast_const(); + debug_assert_ne!(ptr, ptr::from_ref(&self.simple)); + } + // SAFETY: Signature ensures that last use of returned `&PollCondVar` is before drop(), and + // drop() calls `__wake_up_pollfree` followed by waiting a grace period before the + // `CondVar` is destroyed. + Ok(unsafe { PollCondVar::from_non_poll(&*ptr) }) + } + + fn upgrade( + &self, + lock: &Lock, + name: &'static CStr, + key: Pin<&'static LockClassKey>, + ) -> Result<(), AllocError> { + let upgraded = KBox::pin_init( + pin_init!(UpgradePollCondVarInner { + upgraded <- CondVar::new(name, key), + rcu: Opaque::uninit(), + }), + GFP_KERNEL, + ) + .map_err(|_| AllocError)?; + + // SAFETY: The value is treated as pinned. + let upgraded = KBox::into_raw(unsafe { Pin::into_inner_unchecked(upgraded) }); + + let res = self.active.cmpxchg( + ptr::from_ref(&self.simple).cast_mut(), + // SAFETY: This operation stays in-bounds of the above allocation. + unsafe { &raw mut (*upgraded).upgraded }, + Release, + ); + + if res.is_err() { + // SAFETY: The cmpxchg failed, so take back ownership of the box. + drop(unsafe { KBox::from_raw(upgraded) }); + return Ok(()); + } + + // If a normal waiter registers in parallel with us, then either: + // * We took the lock first. In that case, the waiter sees the above cmpxchg. + // * They took the lock first. In that case, we wake them up below. + drop(lock.lock()); + self.simple.notify_all(); + + Ok(()) + } +} + +// Make the `CondVar` methods callable on `UpgradePollCondVar`. +impl Deref for UpgradePollCondVar { + type Target = CondVar; + + fn deref(&self) -> &CondVar { + // SAFETY: By the type invariants, this is either `&self.simple` or references an + // allocation that lives until `UpgradePollCondVar::drop`. + unsafe { &*self.active.load(Acquire) } + } +} + +#[pinned_drop] +impl PinnedDrop for UpgradePollCondVar { + #[inline] + fn drop(self: Pin<&mut Self>) { + // ORDERING: All calls to upgrade happens-before Drop, so no synchronization is required. + let ptr = self.active.load(Relaxed); + if ptr::eq(ptr, &self.simple) { + return; + } + // SAFETY: When the pointer is not &self.active, it is an `UpgradePollCondVarInner`. + let ptr = unsafe { container_of!(ptr, UpgradePollCondVarInner, upgraded) }; + // SAFETY: The pointer points at a valid `wait_queue_head`. + unsafe { bindings::__wake_up_pollfree((*ptr).upgraded.wait_queue_head.get()) }; + // This skips drop of `CondVar`, but that's ok because we reimplemented its drop here. + // + // SAFETY: `__wake_up_pollfree` ensures that all registered PollTable instances are gone in + // one grace period, and this is the destructor so no new PollTable instances can be + // registered. Thus, it's safety to rcu free the `UpgradePollCondVarInner`. + unsafe { bindings::kvfree_call_rcu((*ptr).rcu.get(), ptr.cast::()) }; + } +} -- 2.52.0.457.g6b5491de43-goog Most processes do not use Rust Binder with epoll, so avoid paying the synchronize_rcu() cost in drop for those that don't need it. For those that do, we also manage to replace synchronize_rcu() with kfree_rcu(), though we introduce an extra allocation. Signed-off-by: Alice Ryhl --- drivers/android/binder/process.rs | 2 +- drivers/android/binder/thread.rs | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index 132055b4790f0ec69a87635b498909df2bf475e2..9374f1a86766c09321b57e565b6317cc290ea32b 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs @@ -1684,7 +1684,7 @@ pub(crate) fn poll( table: PollTable<'_>, ) -> Result { let thread = this.get_current_thread()?; - let (from_proc, mut mask) = thread.poll(file, table); + let (from_proc, mut mask) = thread.poll(file, table)?; if mask == 0 && from_proc && !this.inner.lock().work.is_empty() { mask |= bindings::POLLIN; } diff --git a/drivers/android/binder/thread.rs b/drivers/android/binder/thread.rs index 82264db06507d4641b60cbed96af482a9d36e7b2..8f09cf1599ae7edcf2ee60b2cb1b08cc2d0afd3f 100644 --- a/drivers/android/binder/thread.rs +++ b/drivers/android/binder/thread.rs @@ -16,7 +16,7 @@ seq_file::SeqFile, seq_print, sync::atomic::{ordering::Relaxed, Atomic}, - sync::poll::{PollCondVar, PollTable}, + sync::poll::{PollTable, UpgradePollCondVar}, sync::{Arc, SpinLock}, task::Task, types::ARef, @@ -412,7 +412,7 @@ pub(crate) struct Thread { #[pin] inner: SpinLock, #[pin] - work_condvar: PollCondVar, + work_condvar: UpgradePollCondVar, /// Used to insert this thread into the process' `ready_threads` list. /// /// INVARIANT: May never be used for any other list than the `self.process.ready_threads`. @@ -443,7 +443,7 @@ pub(crate) fn new(id: i32, process: Arc) -> Result> { process, task: ARef::from(&**kernel::current!()), inner <- kernel::new_spinlock!(inner, "Thread::inner"), - work_condvar <- kernel::new_poll_condvar!("Thread::work_condvar"), + work_condvar <- kernel::new_upgrade_poll_condvar!("Thread::work_condvar"), links <- ListLinks::new(), links_track <- AtomicTracker::new(), }), @@ -1484,10 +1484,15 @@ pub(crate) fn write_read(self: &Arc, data: UserSlice, wait: bool) -> Resul ret } - pub(crate) fn poll(&self, file: &File, table: PollTable<'_>) -> (bool, u32) { - table.register_wait(file, &self.work_condvar); + pub(crate) fn poll(&self, file: &File, table: PollTable<'_>) -> Result<(bool, u32)> { + let condvar = self.work_condvar.poll( + &self.inner, + c"Thread::work_condvar (upgraded)", + kernel::static_lock_class!(), + )?; + table.register_wait(file, condvar); let mut inner = self.inner.lock(); - (inner.should_use_process_work_queue(), inner.poll()) + Ok((inner.should_use_process_work_queue(), inner.poll())) } /// Make the call to `get_work` or `get_work_local` return immediately, if any. @@ -1523,7 +1528,6 @@ pub(crate) fn notify_if_poll_ready(&self, sync: bool) { pub(crate) fn release(self: &Arc) { self.inner.lock().is_dead = true; - //self.work_condvar.clear(); self.unwind_transaction_stack(); // Cancel all pending work items. -- 2.52.0.457.g6b5491de43-goog