For write/writeback set IOMAP_F_FSVERITY flag telling iomap to not update inode size and to not skip folios beyond EOF. Initiate fsverity writeback with IOMAP_F_FSVERITY set to tell iomap should not skip folio that is dirty beyond EOF. In read path let iomap know that we are reading fsverity metadata. So, treat holes in the tree as request to synthesize tree blocks and hole after descriptor as end of the fsverity region. Introduce a new inode flag meaning that merkle tree is being build on the inode. Signed-off-by: Andrey Albershteyn --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_bmap.c | 7 +++++++ fs/xfs/xfs_aops.c | 16 +++++++++++++++- fs/xfs/xfs_fsverity.c | 34 ++++++++++++++++++++++++++++++++++ fs/xfs/xfs_fsverity.h | 20 ++++++++++++++++++++ fs/xfs/xfs_inode.h | 6 ++++++ fs/xfs/xfs_iomap.c | 15 +++++++++++++-- 7 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 fs/xfs/xfs_fsverity.c create mode 100644 fs/xfs/xfs_fsverity.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 9f7133e02576..38b7f51e5d84 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -149,6 +149,7 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o +xfs-$(CONFIG_FS_VERITY) += xfs_fsverity.o # notify failure ifeq ($(CONFIG_MEMORY_FAILURE),y) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7a4c8f1aa76c..931d02678d19 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -41,6 +41,8 @@ #include "xfs_inode_util.h" #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" +#include "xfs_fsverity.h" +#include struct kmem_cache *xfs_bmap_intent_cache; @@ -4451,6 +4453,11 @@ xfs_bmapi_convert_one_delalloc( XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); XFS_STATS_INC(mp, xs_xstrat_quick); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION) && + XFS_FSB_TO_B(mp, bma.got.br_startoff) >= + xfs_fsverity_metadata_offset(ip)) + flags |= IOMAP_F_FSVERITY; + ASSERT(!isnullstartblock(bma.got.br_startblock)); xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, xfs_iomap_inode_sequence(ip, flags)); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f279055fcea0..9503252a0fa4 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -22,6 +22,7 @@ #include "xfs_icache.h" #include "xfs_zone_alloc.h" #include "xfs_rtgroup.h" +#include "xfs_fsverity.h" #include struct xfs_writepage_ctx { @@ -339,12 +340,16 @@ xfs_map_blocks( int retries = 0; int error = 0; unsigned int *seq; + unsigned int iomap_flags = 0; if (xfs_is_shutdown(mp)) return -EIO; XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + iomap_flags |= IOMAP_F_FSVERITY; + /* * COW fork blocks can overlap data fork blocks even if the blocks * aren't shared. COW I/O always takes precedent, so we must always @@ -432,7 +437,8 @@ xfs_map_blocks( isnullstartblock(imap.br_startblock)) goto allocate_blocks; - xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); + xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, iomap_flags, + XFS_WPC(wpc)->data_seq); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: @@ -705,6 +711,14 @@ xfs_vm_writepages( }, }; + /* + * Writeback does not work for folios past EOF, let it know that + * I/O happens for fsverity metadata and this restriction need + * to be skipped + */ + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + wpc.ctx.iomap.flags |= IOMAP_F_FSVERITY; + return iomap_writepages(&wpc.ctx); } } diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c new file mode 100644 index 000000000000..bc6020cc6e41 --- /dev/null +++ b/fs/xfs/xfs_fsverity.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2026 Red Hat, Inc. + */ +#include "xfs_platform.h" +#include "xfs_format.h" +#include "xfs_inode.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_fsverity.h" +#include "xfs_fsverity.h" +#include +#include + +/* + * At maximum of 8 levels with 128 hashes per block (32 bytes SHA-256) maximum + * tree size is ((128^8 − 1)/(128 − 1)) = 567*10^12 blocks. This should fit in + * 53 bits address space. + * + * At this Merkle tree size we can cover 295EB large file. This is much larger + * than the currently supported file size. + * + * For sha512 the largest file we can cover ends at 1 << 50 offset, this is also + * good. + */ +#define XFS_FSVERITY_LARGEST_FILE ((loff_t)1ULL << 53) + +loff_t +xfs_fsverity_metadata_offset( + const struct xfs_inode *ip) +{ + return round_up(i_size_read(VFS_IC(ip)), 65536); +} diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h new file mode 100644 index 000000000000..5771db2cd797 --- /dev/null +++ b/fs/xfs/xfs_fsverity.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2026 Red Hat, Inc. + */ +#ifndef __XFS_FSVERITY_H__ +#define __XFS_FSVERITY_H__ + +#include "xfs_platform.h" + +#ifdef CONFIG_FS_VERITY +loff_t xfs_fsverity_metadata_offset(const struct xfs_inode *ip); +#else +static inline loff_t xfs_fsverity_metadata_offset(const struct xfs_inode *ip) +{ + WARN_ON_ONCE(1); + return ULLONG_MAX; +} +#endif /* CONFIG_FS_VERITY */ + +#endif /* __XFS_FSVERITY_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index bd6d33557194..6df48d68a919 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -415,6 +415,12 @@ static inline bool xfs_inode_can_sw_atomic_write(const struct xfs_inode *ip) */ #define XFS_IREMAPPING (1U << 15) +/* + * fs-verity's Merkle tree is under construction. The file is read-only, the + * only writes happening are for the fsverity metadata. + */ +#define XFS_VERITY_CONSTRUCTION (1U << 16) + /* All inode state flags related to inode reclaim. */ #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ XFS_IRECLAIM | \ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 9c2f12d5fec9..71ccd4ff5f48 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -32,6 +32,8 @@ #include "xfs_rtbitmap.h" #include "xfs_icache.h" #include "xfs_zone_alloc.h" +#include "xfs_fsverity.h" +#include #define XFS_ALLOC_ALIGN(mp, off) \ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) @@ -1789,6 +1791,9 @@ xfs_buffered_write_iomap_begin( return xfs_direct_write_iomap_begin(inode, offset, count, flags, iomap, srcmap); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + iomap_flags |= IOMAP_F_FSVERITY; + error = xfs_qm_dqattach(ip); if (error) return error; @@ -2113,12 +2118,17 @@ xfs_read_iomap_begin( bool shared = false; unsigned int lockmode = XFS_ILOCK_SHARED; u64 seq; + unsigned int iomap_flags = 0; ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); if (xfs_is_shutdown(mp)) return -EIO; + if (fsverity_active(inode) && + (offset >= xfs_fsverity_metadata_offset(ip))) + iomap_flags |= IOMAP_F_FSVERITY; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; @@ -2132,8 +2142,9 @@ xfs_read_iomap_begin( if (error) return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - shared ? IOMAP_F_SHARED : 0, seq); + iomap_flags |= shared ? IOMAP_F_SHARED : 0; + + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); } const struct iomap_ops xfs_read_iomap_ops = { -- 2.51.2