Add support for discard operations to the rnull block driver: - Add discard module parameter and configfs attribute. - Set max_hw_discard_sectors when discard is enabled. - Add sector occupancy tracking. - Add discard handling that frees sectors and removes empty pages. - Discard operations require memory backing to function. The discard feature uses a bitmap to track which sectors in each page are occupied, allowing cleanup of pages when they are empty. Signed-off-by: Andreas Hindborg --- drivers/block/rnull/configfs.rs | 15 ++++++ drivers/block/rnull/rnull.rs | 115 +++++++++++++++++++++++++++++++++++----- 2 files changed, 116 insertions(+), 14 deletions(-) diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs index fe00617d2b679..4ec9be440105d 100644 --- a/drivers/block/rnull/configfs.rs +++ b/drivers/block/rnull/configfs.rs @@ -94,6 +94,7 @@ fn make_group( submit_queues: 7, use_per_node_hctx: 8, home_node: 9, + discard: 10, ], }; @@ -114,6 +115,7 @@ fn make_group( memory_backed: false, submit_queues: 1, home_node: bindings::NUMA_NO_NODE, + discard: false, }), }), core::iter::empty(), @@ -171,6 +173,7 @@ struct DeviceConfigInner { memory_backed: bool, submit_queues: u32, home_node: i32, + discard: bool, } #[vtable] @@ -204,6 +207,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result { memory_backed: guard.memory_backed, submit_queues: guard.submit_queues, home_node: guard.home_node, + discard: guard.discard, })?); guard.powered = true; } else if guard.powered && !power_op { @@ -321,3 +325,14 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result { } } ); + +configfs_attribute!(DeviceConfig, 10, + show: |this, page| show_field(this.data.lock().discard, page), + store: |this, page| store_with_power_check(this, page, |this, page| { + if !this.data.lock().memory_backed { + return Err(EINVAL); + } + this.data.lock().discard = kstrtobool_bytes(page)?; + Ok(()) + }) +); diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs index fb3ce272d5bec..d1fb5fbc0c0be 100644 --- a/drivers/block/rnull/rnull.rs +++ b/drivers/block/rnull/rnull.rs @@ -19,14 +19,19 @@ Operations, TagSet, // }, + SECTOR_MASK, SECTOR_SHIFT, }, error::{ code, Result, // }, + ffi, new_mutex, new_xarray, - page::SafePage, + page::{ + SafePage, // + PAGE_SIZE, + }, pr_info, prelude::*, str::CString, @@ -102,6 +107,11 @@ default: -1, description: "Home node for the device. Default: -1 (no node)", }, + discard: u8 { + default: 0, + description: + "Support discard operations (requires memory-backed null_blk device). Default: false", + }, }, } @@ -139,6 +149,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit { memory_backed: *module_parameters::memory_backed.value() != 0, submit_queues, home_node: *module_parameters::home_node.value(), + discard: *module_parameters::discard.value() != 0, })?; disks.push(disk, GFP_KERNEL)?; } @@ -163,6 +174,7 @@ struct NullBlkOptions<'a> { memory_backed: bool, submit_queues: u32, home_node: i32, + discard: bool, } struct NullBlkDevice; @@ -178,6 +190,7 @@ fn new(options: NullBlkOptions<'_>) -> Result> { memory_backed, submit_queues, home_node, + discard, } = options; let flags = if memory_backed { @@ -201,22 +214,30 @@ fn new(options: NullBlkOptions<'_>) -> Result> { irq_mode, completion_time, memory_backed, + block_size: block_size as usize, }), GFP_KERNEL, )?; - gen_disk::GenDiskBuilder::new() + let mut builder = gen_disk::GenDiskBuilder::new() .capacity_sectors(capacity_mib << (20 - block::SECTOR_SHIFT)) .logical_block_size(block_size)? .physical_block_size(block_size)? - .rotational(rotational) - .build(fmt!("{}", name.to_str()?), tagset, queue_data) + .rotational(rotational); + + if memory_backed && discard { + builder = builder + // Max IO size is u32::MAX bytes + .max_hw_discard_sectors(ffi::c_uint::MAX >> block::SECTOR_SHIFT); + } + + builder.build(fmt!("{}", name.to_str()?), tagset, queue_data) } #[inline(always)] fn write(tree: &Tree, mut sector: usize, mut segment: Segment<'_>) -> Result { while !segment.is_empty() { - let page = SafePage::alloc_page(GFP_NOIO)?; + let page = NullBlockPage::new()?; let mut tree = tree.lock(); let page_idx = sector >> block::PAGE_SECTORS_SHIFT; @@ -228,8 +249,10 @@ fn write(tree: &Tree, mut sector: usize, mut segment: Segment<'_>) -> Result { tree.get_mut(page_idx).unwrap() }; + page.set_occupied(sector); let page_offset = (sector & block::SECTOR_MASK as usize) << block::SECTOR_SHIFT; - sector += segment.copy_to_page(page, page_offset) >> block::SECTOR_SHIFT; + sector += + segment.copy_to_page(page.page.get_pin_mut(), page_offset) >> block::SECTOR_SHIFT; } Ok(()) } @@ -243,7 +266,7 @@ fn read(tree: &Tree, mut sector: usize, mut segment: Segment<'_>) -> Result { if let Some(page) = tree.get(idx) { let page_offset = (sector & block::SECTOR_MASK as usize) << block::SECTOR_SHIFT; - sector += segment.copy_from_page(page, page_offset) >> block::SECTOR_SHIFT; + sector += segment.copy_from_page(&page.page, page_offset) >> block::SECTOR_SHIFT; } else { sector += segment.zero_page() >> block::SECTOR_SHIFT; } @@ -252,6 +275,32 @@ fn read(tree: &Tree, mut sector: usize, mut segment: Segment<'_>) -> Result { Ok(()) } + fn discard(tree: &Tree, mut sector: usize, sectors: usize, block_size: usize) -> Result { + let mut remaining_bytes = sectors << SECTOR_SHIFT; + let mut tree = tree.lock(); + + while remaining_bytes > 0 { + let page_idx = sector >> block::PAGE_SECTORS_SHIFT; + let mut remove = false; + if let Some(page) = tree.get_mut(page_idx) { + page.set_free(sector); + if page.is_empty() { + remove = true; + } + } + + if remove { + drop(tree.remove(page_idx)) + } + + let processed = remaining_bytes.min(block_size); + sector += processed >> SECTOR_SHIFT; + remaining_bytes -= processed; + } + + Ok(()) + } + #[inline(never)] fn transfer( command: bindings::req_op, @@ -268,7 +317,40 @@ fn transfer( } } -type TreeNode = Owned; +const _CHEKC_STATUS_WIDTH: () = build_assert!((PAGE_SIZE >> SECTOR_SHIFT) <= 64); + +struct NullBlockPage { + page: Owned, + status: u64, +} + +impl NullBlockPage { + fn new() -> Result> { + Ok(KBox::new( + Self { + page: SafePage::alloc_page(GFP_NOIO | __GFP_ZERO)?, + status: 0, + }, + GFP_NOIO, + )?) + } + + fn set_occupied(&mut self, sector: usize) { + let idx = sector & SECTOR_MASK as usize; + self.status |= 1 << idx; + } + + fn set_free(&mut self, sector: usize) { + let idx = sector & SECTOR_MASK as usize; + self.status &= !(1 << idx); + } + + fn is_empty(&self) -> bool { + self.status == 0 + } +} + +type TreeNode = KBox; type Tree = XArray; #[pin_data] @@ -278,6 +360,7 @@ struct QueueData { irq_mode: IRQMode, completion_time: Delta, memory_backed: bool, + block_size: usize, } #[pin_data] @@ -327,12 +410,16 @@ fn queue_rq( let command = rq.command(); let mut sector = rq.sector(); - for bio in rq.bio_iter_mut() { - let segment_iter = bio.segment_iter(); - for segment in segment_iter { - let length = segment.len(); - Self::transfer(command, tree, sector, segment)?; - sector += length as usize >> block::SECTOR_SHIFT; + if command == bindings::req_op_REQ_OP_DISCARD { + Self::discard(tree, sector, rq.sectors(), queue_data.block_size)?; + } else { + for bio in rq.bio_iter_mut() { + let segment_iter = bio.segment_iter(); + for segment in segment_iter { + let length = segment.len(); + Self::transfer(command, tree, sector, segment)?; + sector += length as usize >> block::SECTOR_SHIFT; + } } } } -- 2.51.2