Add bandwidth limiting support to rnull via the `mbps` configfs attribute. When set to a non-zero value, the driver limits I/O throughput to the specified rate in megabytes per second. The implementation uses a token bucket algorithm to enforce the rate limit, delaying request completion when the limit is exceeded. Signed-off-by: Andreas Hindborg --- drivers/block/rnull/configfs.rs | 7 ++- drivers/block/rnull/rnull.rs | 111 +++++++++++++++++++++++++++++++++++----- 2 files changed, 105 insertions(+), 13 deletions(-) diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs index e365eb06be6de..d369d8b683c61 100644 --- a/drivers/block/rnull/configfs.rs +++ b/drivers/block/rnull/configfs.rs @@ -105,6 +105,7 @@ fn make_group( badblocks_once: 13, badblocks_partial_io: 14, cache_size_mib: 15, + mbps: 16, ], }; @@ -136,6 +137,7 @@ fn make_group( GFP_KERNEL )?, cache_size_mib: 0, + mbps: 0, }), }), core::iter::empty(), @@ -200,6 +202,7 @@ struct DeviceConfigInner { bad_blocks_partial_io: bool, cache_size_mib: u64, disk_storage: Arc, + mbps: u32, } #[vtable] @@ -239,6 +242,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result { bad_blocks_once: guard.bad_blocks_once, bad_blocks_partial_io: guard.bad_blocks_partial_io, storage: guard.disk_storage.clone(), + bandwidth_limit: u64::from(guard.mbps) * 2u64.pow(20), })?); guard.powered = true; } else if guard.powered && !power_op { @@ -250,7 +254,6 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result { } } -// DiskStorage::new(cache_size_mib << 20, block_size as usize), configfs_simple_field!(DeviceConfig, 1, block_size, u32, check GenDiskBuilder::validate_block_size); configfs_simple_bool_field!(DeviceConfig, 2, rotational); configfs_simple_field!(DeviceConfig, 3, capacity_mib, u64); @@ -457,3 +460,5 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result { Ok(()) }) ); + +configfs_simple_field!(DeviceConfig, 16, mbps, u32); diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs index 55cbdfed25414..16a4f915e59f4 100644 --- a/drivers/block/rnull/rnull.rs +++ b/drivers/block/rnull/rnull.rs @@ -25,7 +25,8 @@ self, gen_disk::{ self, - GenDisk, // + GenDisk, + GenDiskRef, // }, Operations, TagSet, // @@ -37,24 +38,31 @@ Result, // }, ffi, + impl_has_hr_timer, new_mutex, new_spinlock, pr_info, prelude::*, + revocable::Revocable, str::CString, sync::{ aref::ARef, atomic::{ ordering, Atomic, // - }, // + }, Arc, + ArcBorrow, Mutex, + SetOnce, SpinLock, - SpinLockGuard, + SpinLockGuard, // }, time::{ hrtimer::{ + self, + ArcHrTimerHandle, + HrTimer, HrTimerCallback, HrTimerCallbackContext, HrTimerPointer, @@ -129,6 +137,10 @@ default: 0, description: "No IO scheduler", }, + mbps: u32 { + default: 0, + description: "Max bandwidth in MiB/s. 0 means no limit.", + }, }, } @@ -174,6 +186,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit { bad_blocks_once: false, bad_blocks_partial_io: false, storage: Arc::pin_init(DiskStorage::new(0, block_size as usize), GFP_KERNEL)?, + bandwidth_limit: u64::from(*module_parameters::mbps.value()) * 2u64.pow(20), })?; disks.push(disk, GFP_KERNEL)?; } @@ -204,6 +217,7 @@ struct NullBlkOptions<'a> { bad_blocks_once: bool, bad_blocks_partial_io: bool, storage: Arc, + bandwidth_limit: u64, } #[pin_data] @@ -216,9 +230,18 @@ struct NullBlkDevice { bad_blocks: Arc, bad_blocks_once: bool, bad_blocks_partial_io: bool, + bandwidth_limit: u64, + #[pin] + bandwidth_timer: HrTimer, + bandwidth_bytes: Atomic, + #[pin] + bandwidth_timer_handle: SpinLock>>, + disk: SetOnce>>>, } impl NullBlkDevice { + const BANDWIDTH_TIMER_INTERVAL: Delta = Delta::from_millis(20); + fn new(options: NullBlkOptions<'_>) -> Result>> { let NullBlkOptions { name, @@ -236,6 +259,7 @@ fn new(options: NullBlkOptions<'_>) -> Result>> { bad_blocks_once, bad_blocks_partial_io, storage, + bandwidth_limit, } = options; let mut flags = mq::tag_set::Flags::default(); @@ -260,7 +284,7 @@ fn new(options: NullBlkOptions<'_>) -> Result>> { GFP_KERNEL, )?; - let queue_data = Box::try_pin_init( + let queue_data = Arc::try_pin_init( try_pin_init!(Self { storage, irq_mode, @@ -270,6 +294,11 @@ fn new(options: NullBlkOptions<'_>) -> Result>> { bad_blocks, bad_blocks_once, bad_blocks_partial_io, + bandwidth_limit: bandwidth_limit / 50, + bandwidth_timer <- HrTimer::new(), + bandwidth_bytes: Atomic::new(0), + bandwidth_timer_handle <- new_spinlock!(None), + disk: SetOnce::new(), }), GFP_KERNEL, )?; @@ -286,7 +315,10 @@ fn new(options: NullBlkOptions<'_>) -> Result>> { .max_hw_discard_sectors(ffi::c_uint::MAX >> block::SECTOR_SHIFT); } - builder.build(fmt!("{}", name.to_str()?), tagset, queue_data) + let disk = builder.build(fmt!("{}", name.to_str()?), tagset, queue_data)?; + let queue_data: ArcBorrow<'_, Self> = disk.queue_data(); + queue_data.disk.populate(disk.get_ref()); + Ok(disk) } fn sheaf_size() -> usize { @@ -504,6 +536,36 @@ fn end_request(rq: Owned>) { } } +impl_has_hr_timer! { + impl HasHrTimer for NullBlkDevice { + mode: hrtimer::RelativeHardMode, + field: self.bandwidth_timer, + } +} + +impl HrTimerCallback for NullBlkDevice { + type Pointer<'a> = Arc; + + fn run( + this: ArcBorrow<'_, Self>, + mut context: HrTimerCallbackContext<'_, Self>, + ) -> HrTimerRestart { + if this.bandwidth_bytes.load(ordering::Relaxed) == 0 { + return HrTimerRestart::NoRestart; + } + + this.disk.as_ref().map(|disk| { + disk.try_access() + .map(|disk| disk.queue().start_stopped_hw_queues_async()) + }); + + this.bandwidth_bytes.store(0, ordering::Relaxed); + + context.forward_now(Self::BANDWIDTH_TIMER_INTERVAL); + HrTimerRestart::Restart + } +} + struct HwQueueContext { page: Option>, } @@ -511,7 +573,7 @@ struct HwQueueContext { #[pin_data] struct Pdu { #[pin] - timer: kernel::time::hrtimer::HrTimer, + timer: HrTimer, error: Atomic, } @@ -560,14 +622,14 @@ fn align_down(value: T, to: T) -> T #[vtable] impl Operations for NullBlkDevice { - type QueueData = Pin>; + type QueueData = Arc; type RequestData = Pdu; type TagSetData = (); type HwData = Pin>>; fn new_request_data() -> impl PinInit { pin_init!(Pdu { - timer <- kernel::time::hrtimer::HrTimer::new(), + timer <- HrTimer::new(), error: Atomic::new(0), }) } @@ -575,14 +637,39 @@ fn new_request_data() -> impl PinInit { #[inline(always)] fn queue_rq( hw_data: Pin<&SpinLock>, - this: Pin<&Self>, + this: ArcBorrow<'_, Self>, rq: Owned>, _is_last: bool, ) -> BlkResult { - let mut rq = rq.start(); let mut sectors = rq.sectors(); - Self::handle_bad_blocks(this.get_ref(), &mut rq, &mut sectors)?; + if this.bandwidth_limit != 0 { + if !this.bandwidth_timer.active() { + drop(this.bandwidth_timer_handle.lock().take()); + let arc: Arc<_> = this.into(); + *this.bandwidth_timer_handle.lock() = + Some(arc.start(Self::BANDWIDTH_TIMER_INTERVAL)); + } + + if this + .bandwidth_bytes + .fetch_add(u64::from(rq.bytes()), ordering::Relaxed) + + u64::from(rq.bytes()) + > this.bandwidth_limit + { + rq.queue().stop_hw_queues(); + if this.bandwidth_bytes.load(ordering::Relaxed) <= this.bandwidth_limit { + rq.queue().start_stopped_hw_queues_async(); + } + + return Err(kernel::block::error::code::BLK_STS_DEV_RESOURCE); + } + } + + let mut rq = rq.start(); + + use core::ops::Deref; + Self::handle_bad_blocks(this.deref(), &mut rq, &mut sectors)?; if this.memory_backed { if rq.command() == bindings::req_op_REQ_OP_DISCARD { @@ -604,7 +691,7 @@ fn queue_rq( Ok(()) } - fn commit_rqs(_hw_data: Pin<&SpinLock>, _queue_data: Pin<&Self>) {} + fn commit_rqs(_hw_data: Pin<&SpinLock>, _queue_data: ArcBorrow<'_, Self>) {} fn init_hctx(_tagset_data: (), _hctx_idx: u32) -> Result { KBox::pin_init(new_spinlock!(HwQueueContext { page: None }), GFP_KERNEL) -- 2.51.2