Add fault injection support to rnull using the kernel fault injection infrastructure. When enabled via `CONFIG_FAULT_INJECTION`, users can inject failures into I/O requests through the standard fault injection debugfs interface. The fault injection point is exposed as a configfs default group, allowing per-device fault injection configuration. Signed-off-by: Andreas Hindborg --- drivers/block/rnull/Kconfig | 11 ++++ drivers/block/rnull/configfs.rs | 57 ++++++++++++++++++- drivers/block/rnull/rnull.rs | 120 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 179 insertions(+), 9 deletions(-) diff --git a/drivers/block/rnull/Kconfig b/drivers/block/rnull/Kconfig index 7bc5b376c128b..1ade5d8c17997 100644 --- a/drivers/block/rnull/Kconfig +++ b/drivers/block/rnull/Kconfig @@ -11,3 +11,14 @@ config BLK_DEV_RUST_NULL devices that can be configured via various configuration options. If unsure, say N. + +config BLK_DEV_RUST_NULL_FAULT_INJECTION + bool "Support fault injection for Rust Null test block driver" + depends on BLK_DEV_RUST_NULL && FAULT_INJECTION_CONFIGFS + help + Enable fault injection support for the Rust null block driver. This + allows injecting errors into block I/O operations for testing error + handling paths and verifying system resilience. Fault injection is + configured through configfs alongside the null block device settings. + + If unsure, say N. diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs index 424722f01ab8d..b449ac882d961 100644 --- a/drivers/block/rnull/configfs.rs +++ b/drivers/block/rnull/configfs.rs @@ -46,6 +46,9 @@ mod macros; +#[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] +use kernel::fault_injection::FaultConfig; + pub(crate) fn subsystem() -> impl PinInit, Error> { let item_type = configfs_attrs! { container: configfs::Subsystem, @@ -122,10 +125,44 @@ fn make_group( ], }; + use kernel::configfs::CDefaultGroup; + + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + let mut default_groups: KVec> = KVec::new(); + + #[cfg(not(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION))] + let default_groups: KVec> = KVec::new(); + + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + let timeout_inject = Arc::pin_init( + kernel::fault_injection::FaultConfig::new(c"timeout_inject"), + GFP_KERNEL, + )?; + + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + let requeue_inject = Arc::pin_init( + kernel::fault_injection::FaultConfig::new(c"requeue_inject"), + GFP_KERNEL, + )?; + + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + let init_hctx_inject = Arc::pin_init( + kernel::fault_injection::FaultConfig::new(c"init_hctx_fault_inject"), + GFP_KERNEL, + )?; + + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + { + default_groups.push(timeout_inject.clone(), GFP_KERNEL)?; + default_groups.push(requeue_inject.clone(), GFP_KERNEL)?; + default_groups.push(init_hctx_inject.clone(), GFP_KERNEL)?; + } + let block_size = 4096; Ok(configfs::Group::new( name.try_into()?, item_type, + // default_groups, // TODO: cannot coerce new_mutex!() to impl PinInit<_, Error>, so put mutex inside try_pin_init!(DeviceConfig { data <- new_mutex!(DeviceConfigInner { @@ -165,9 +202,15 @@ fn make_group( zone_max_active: 0, zone_append_max_sectors: u32::MAX, fua: true, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + timeout_inject, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_inject, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + init_hctx_inject, }), }), - core::iter::empty(), + default_groups, )) } } @@ -241,6 +284,12 @@ struct DeviceConfigInner { zone_max_active: u32, zone_append_max_sectors: u32, fua: bool, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + timeout_inject: Arc, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_inject: Arc, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + init_hctx_inject: Arc, } #[vtable] @@ -292,6 +341,12 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result { zone_max_active: guard.zone_max_active, zone_append_max_sectors: guard.zone_append_max_sectors, forced_unit_access: guard.fua, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_inject: guard.requeue_inject.clone(), + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + init_hctx_inject: guard.init_hctx_inject.clone(), + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + timeout_inject: guard.timeout_inject.clone(), })?); guard.powered = true; } else if guard.powered && !power_op { diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs index db856f03b78cb..b2b089a657f12 100644 --- a/drivers/block/rnull/rnull.rs +++ b/drivers/block/rnull/rnull.rs @@ -40,6 +40,7 @@ IoCompletionBatch, Operations, RequestList, + RequestTimeoutStatus, TagSet, // }, SECTOR_SHIFT, @@ -90,6 +91,9 @@ use pin_init::PinInit; use util::*; +#[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] +use kernel::fault_injection::FaultConfig; + module! { type: NullBlkModule, name: "rnull_mod", @@ -205,6 +209,8 @@ }, } +// TODO: Fault inject via params - requires module_params string support. + #[pin_data] struct NullBlkModule { #[pin] @@ -267,6 +273,15 @@ fn init(_module: &'static ThisModule) -> impl PinInit { zone_max_active: *module_parameters::zone_max_active.value(), zone_append_max_sectors: *module_parameters::zone_append_max_sectors.value(), forced_unit_access: *module_parameters::fua.value() != 0, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_inject: Arc::pin_init(FaultConfig::new(c"requeue_inject"), GFP_KERNEL)?, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + init_hctx_inject: Arc::pin_init( + FaultConfig::new(c"init_hctx_fault_inject"), + GFP_KERNEL, + )?, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + timeout_inject: Arc::pin_init(FaultConfig::new(c"timeout_inject"), GFP_KERNEL)?, })?; disks.push(disk, GFP_KERNEL)?; } @@ -315,6 +330,12 @@ struct NullBlkOptions<'a> { #[cfg_attr(not(CONFIG_BLK_DEV_ZONED), expect(unused_variables))] zone_append_max_sectors: u32, forced_unit_access: bool, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_inject: Arc, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + init_hctx_inject: Arc, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + timeout_inject: Arc, } static SHARED_TAG_SET: SetOnce>> = SetOnce::new(); @@ -339,6 +360,12 @@ struct NullBlkDevice { #[cfg(CONFIG_BLK_DEV_ZONED)] #[pin] zoned: zoned::ZoneOptions, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_inject: Arc, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_selector: kernel::sync::atomic::Atomic, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + timeout_inject: Arc, } impl NullBlkDevice { @@ -373,6 +400,13 @@ fn new(options: NullBlkOptions<'_>) -> Result>> { zone_max_active, zone_append_max_sectors, forced_unit_access, + + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_inject, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + init_hctx_inject, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + timeout_inject, } = options; let mut flags = mq::tag_set::Flags::default(); @@ -402,6 +436,8 @@ fn new(options: NullBlkOptions<'_>) -> Result>> { NullBlkTagsetData { queue_depth: hw_queue_depth, queue_config, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + init_hctx_inject, }, GFP_KERNEL, )?, @@ -450,6 +486,12 @@ fn new(options: NullBlkOptions<'_>) -> Result>> { zone_max_active, zone_append_max_sectors, })?, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_inject, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + requeue_selector: Atomic::new(0), + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + timeout_inject, }), GFP_KERNEL, )?; @@ -680,7 +722,9 @@ fn handle_bad_blocks(&self, rq: &mut Owned>, sectors: &mut u32 badblocks::BlockStatus::None => {} badblocks::BlockStatus::Acknowledged(mut range) | badblocks::BlockStatus::Unacknowledged(mut range) => { - rq.data_ref().error.store(1, ordering::Relaxed); + rq.data_ref() + .error + .store(block::error::code::BLK_STS_IOERR.into(), ordering::Relaxed); if self.bad_blocks_once { self.bad_blocks.set_good(range.clone())?; @@ -705,6 +749,7 @@ fn end_request(rq: Owned>) { let status = rq.data_ref().error.load(ordering::Relaxed); rq.data_ref().error.store(0, ordering::Relaxed); + // TODO: Use correct error code match status { 0 => rq.end_ok(), _ => rq.end(bindings::BLK_STS_IOERR), @@ -730,6 +775,24 @@ fn queue_rq_internal( rq: Owned>, _is_last: bool, ) -> Result<(), QueueRequestError> { + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + if rq.queue_data().requeue_inject.should_fail(1) { + if rq + .queue_data() + .requeue_selector + .fetch_add(1, ordering::Relaxed) + & 1 + == 0 + { + return Err(QueueRequestError { + request: rq, + }); + } else { + rq.requeue(true); + return Ok(()); + } + } + if this.bandwidth_limit != 0 { if !this.bandwidth_timer.active() { drop(this.bandwidth_timer_handle.lock().take()); @@ -755,6 +818,12 @@ fn queue_rq_internal( let mut rq = rq.start(); + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + if rq.queue_data().timeout_inject.should_fail(1) { + rq.data_ref().fake_timeout.store(1, ordering::Relaxed); + return Ok(()); + } + if rq.command() == mq::Command::Flush { if this.memory_backed { this.storage.flush(&hw_data); @@ -778,12 +847,13 @@ fn queue_rq_internal( Ok(()) })(); - if let Err(e) = status { - // Do not overwrite existing error. We do not care whether this write fails. - let _ = rq - .data_ref() - .error - .cmpxchg(0, e.to_errno(), ordering::Relaxed); + if status.is_err() { + // Do not overwrite existing error. + let _ = rq.data_ref().error.cmpxchg( + 0, + kernel::block::error::code::BLK_STS_IOERR.into(), + ordering::Relaxed, + ); } if rq.is_poll() { @@ -861,7 +931,8 @@ struct HwQueueContext { struct Pdu { #[pin] timer: HrTimer, - error: Atomic, + error: Atomic, + fake_timeout: Atomic, } impl HrTimerCallback for Pdu { @@ -886,6 +957,8 @@ impl HasHrTimer for Pdu { struct NullBlkTagsetData { queue_depth: u32, queue_config: Arc>, + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + init_hctx_inject: Arc, } #[vtable] @@ -899,6 +972,7 @@ fn new_request_data() -> impl PinInit { pin_init!(Pdu { timer <- HrTimer::new(), error: Atomic::new(0), + fake_timeout: Atomic::new(0), }) } @@ -953,6 +1027,11 @@ fn poll( } fn init_hctx(tagset_data: &NullBlkTagsetData, _hctx_idx: u32) -> Result { + #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)] + if tagset_data.init_hctx_inject.should_fail(1) { + return Err(EFAULT); + } + KBox::pin_init( new_spinlock!(HwQueueContext { page: None, @@ -1013,4 +1092,29 @@ fn map_queues(tag_set: Pin<&mut TagSet>) { }) .unwrap() } + + fn request_timeout(tag_set: &TagSet, qid: u32, tag: u32) -> RequestTimeoutStatus { + if let Some(request) = tag_set.tag_to_rq(qid, tag) { + pr_info!("Request timed out\n"); + // Only fail requests that are faking timeouts. Requests that time + // out due to memory pressure will be completed normally. + if request.data_ref().fake_timeout.load(ordering::Relaxed) != 0 { + request.data_ref().error.store( + block::error::code::BLK_STS_TIMEOUT.into(), + ordering::Relaxed, + ); + request.data_ref().fake_timeout.store(0, ordering::Relaxed); + + if let Ok(request) = OwnableRefCounted::try_from_shared(request) { + Self::end_request(request); + return RequestTimeoutStatus::Completed; + } + // TODO: pr_warn_once! + pr_warn!("Timed out request could not be completed\n"); + } + } else { + pr_warn!("Timed out request referenced in timeout handler\n"); + } + RequestTimeoutStatus::RetryLater + } } -- 2.51.2