ntfs_write_mft_block() maps each $MFT record through the $MFT data
runlist. For sub-folio clusters it looks up a struct runlist_element under
ni->runlist.lock, drops the lock, and later uses rl->length and rl->vcn
when choosing folio_sz.
That pointer is only borrowed from ni->runlist.rl. Concurrent $MFT
allocation extension can merge a replacement runlist under the same lock,
and ntfs_rl_realloc() can free the old backing array. If that happens
between the lookup and the later folio_sz decision, writeback can
dereference freed runlist storage.
The buggy scenario involves two paths, with each column showing the order
within that path:
MFT writeback path: $MFT allocation extension:
1. Look up rl under 1. Extend the $MFT data allocation.
ni->runlist.lock. 2. Publish a replacement runlist.
2. Drop ni->runlist.lock. 3. Free the old runlist array.
3. Read rl->length and rl->vcn
to choose folio_sz.
Compute the remaining run length while ni->runlist.lock is still held, and
use that scalar after unlock. This preserves the existing folio sizing
decision without carrying a borrowed runlist_element across the lock
boundary.
Validation reproduced this kernel report:
BUG: KASAN: slab-use-after-free in ntfs_mft_writepages+0x1c8d/0x1fb0
Call Trace:
dump_stack_lvl+0x66/0xa0
print_report+0xce/0x630
? ntfs_mft_writepages+0x1c8d/0x1fb0
? srso_alias_return_thunk+0x5/0xfbef5
? __virt_addr_valid+0x20d/0x410
? ntfs_mft_writepages+0x1c8d/0x1fb0
kasan_report+0xe0/0x110
? ntfs_mft_writepages+0x1c8d/0x1fb0
ntfs_mft_writepages+0x1c8d/0x1fb0
? __pfx_ntfs_mft_writepages+0x10/0x10
? __pfx___mutex_unlock_slowpath+0x10/0x10
? srso_alias_return_thunk+0x5/0xfbef5
? iput+0x92/0xa80
do_writepages+0x219/0x530
? __pfx_do_writepages+0x10/0x10
__writeback_single_inode+0x117/0xf50
? do_raw_spin_lock+0x130/0x270
? __pfx_do_raw_spin_lock+0x10/0x10
? __pfx___writeback_single_inode+0x10/0x10
? srso_alias_return_thunk+0x5/0xfbef5
writeback_sb_inodes+0x65b/0x1810
? srso_alias_return_thunk+0x5/0xfbef5
? lock_acquire+0x2b8/0x2f0
? __pfx_writeback_sb_inodes+0x10/0x10
? lock_release+0x1e0/0x280
? _raw_spin_unlock+0x23/0x40
? move_expired_inodes+0x2b8/0x850
__writeback_inodes_wb+0xf4/0x270
? __pfx___writeback_inodes_wb+0x10/0x10
? srso_alias_return_thunk+0x5/0xfbef5
? queue_io+0x2e4/0x410
wb_writeback+0x666/0x880
? srso_alias_return_thunk+0x5/0xfbef5
? __pfx_wb_writeback+0x10/0x10
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? get_nr_dirty_inodes+0x1c/0x170
wb_workfn+0x75e/0xbb0
? srso_alias_return_thunk+0x5/0xfbef5
? _raw_spin_unlock_irqrestore+0x27/0x60
? __pfx_wb_workfn+0x10/0x10
? __pfx_debug_object_deactivate+0x10/0x10
? lock_acquire+0x2b8/0x2f0
? srso_alias_return_thunk+0x5/0xfbef5
? lock_release+0x1e0/0x280
process_one_work+0x8d0/0x1870
? __pfx_process_one_work+0x10/0x10
? srso_alias_return_thunk+0x5/0xfbef5
worker_thread+0x575/0xf80
? __pfx_worker_thread+0x10/0x10
kthread+0x2e7/0x3c0
? __pfx_kthread+0x10/0x10
ret_from_fork+0x576/0x810
? __pfx_ret_from_fork+0x10/0x10
? srso_alias_return_thunk+0x5/0xfbef5
? __switch_to+0x57e/0xe10
? __switch_to_asm+0x33/0x70
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1a/0x30
Allocated by task 970:
kasan_save_stack+0x33/0x60
kasan_save_track+0x14/0x30
__kasan_kmalloc+0xaa/0xb0
__kvmalloc_node_noprof+0x353/0x920
ntfs_rl_realloc+0x3c/0x80
ntfs_runlists_merge+0x1212/0x3010
ntfs_mft_data_extend_allocation_nolock+0x3e0/0x1f40
ntfs_mft_record_alloc+0x1ab4/0x4f10
__ntfs_create+0x680/0x2e50
ntfs_create+0x1e6/0x3a0
path_openat+0x2b55/0x3c10
do_file_open+0x1f4/0x460
do_sys_openat2+0xde/0x170
__x64_sys_openat+0x122/0x1e0
do_syscall_64+0x115/0x6a0
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Freed by task 1294:
kasan_save_stack+0x33/0x60
kasan_save_track+0x14/0x30
kasan_save_free_info+0x3b/0x60
__kasan_slab_free+0x5f/0x80
kfree+0x307/0x580
ntfs_rl_realloc+0x66/0x80
ntfs_runlists_merge+0x1212/0x3010
ntfs_mft_data_extend_allocation_nolock+0x3e0/0x1f40
ntfs_mft_record_alloc+0x1ab4/0x4f10
__ntfs_create+0x680/0x2e50
ntfs_create+0x1e6/0x3a0
path_openat+0x2b55/0x3c10
do_file_open+0x1f4/0x460
do_sys_openat2+0xde/0x170
__x64_sys_openat+0x122/0x1e0
do_syscall_64+0x115/0x6a0
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Fixes: 115380f9a2f9 ("ntfs: update mft operations")
Assisted-by: Codex:gpt-5.5
Signed-off-by: Cen Zhang
---
fs/ntfs/mft.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index a5019e80951b..fd20d7abd6f5 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2637,7 +2637,6 @@ static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *w
s64 vcn = ntfs_pidx_to_cluster(vol, folio->index);
s64 end_vcn = ntfs_bytes_to_cluster(vol, ni->allocated_size);
unsigned int folio_sz;
- struct runlist_element *rl = NULL;
loff_t i_size = i_size_read(vi);
ntfs_debug("Entering for inode 0x%llx, attribute type 0x%x, folio index 0x%lx.",
@@ -2682,6 +2681,7 @@ static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *w
&tni, &ref_inos[nr_ref_inos])) {
unsigned int mft_record_off = 0;
s64 vcn_off = vcn;
+ s64 rl_len = 0;
/*
* The record should be written. If a locked ntfs
@@ -2701,8 +2701,12 @@ static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *w
}
if (vol->cluster_size < folio_size(folio)) {
+ struct runlist_element *rl;
+
down_write(&ni->runlist.lock);
rl = ntfs_attr_vcn_to_rl(ni, vcn_off, &lcn);
+ if (!IS_ERR(rl))
+ rl_len = rl->length - (vcn_off - rl->vcn);
up_write(&ni->runlist.lock);
if (IS_ERR(rl) || lcn < 0) {
err = -EIO;
@@ -2733,7 +2737,7 @@ static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *w
if (vol->cluster_size == NTFS_BLOCK_SIZE &&
(mft_record_off ||
- (rl && rl->length - (vcn_off - rl->vcn) == 1) ||
+ rl_len == 1 ||
mft_ofs + NTFS_BLOCK_SIZE >= PAGE_SIZE))
folio_sz = NTFS_BLOCK_SIZE;
else
--
2.43.0