From: "Darrick J. Wong" Provide a new function call so that validation errors can be reported back to the filesystem. Signed-off-by: Darrick J. Wong Signed-off-by: Andrey Albershteyn --- fs/verity/verify.c | 4 ++++ include/linux/fsverity.h | 14 ++++++++++++++ include/trace/events/fsverity.h | 19 +++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 404ab68aaf9b..8f930b2ed9c0 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -312,6 +312,10 @@ static bool verify_data_block(struct fsverity_info *vi, data_pos, level - 1, params->hash_alg->name, hsize, want_hash, params->hash_alg->name, hsize, level == 0 ? dblock->real_hash : real_hash); + trace_fsverity_file_corrupt(inode, data_pos, params->block_size); + if (inode->i_sb->s_vop->file_corrupt) + inode->i_sb->s_vop->file_corrupt(inode, data_pos, + params->block_size); error: for (; level > 0; level--) { kunmap_local(hblocks[level - 1].addr); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index fed91023bea9..d8b581e3ce48 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -132,6 +132,20 @@ struct fsverity_operations { */ int (*write_merkle_tree_block)(struct file *file, const void *buf, u64 pos, unsigned int size); + + /** + * Notify the filesystem that file data is corrupt. + * + * @inode: the inode being validated + * @pos: the file position of the invalid data + * @len: the length of the invalid data + * + * This function is called when fs-verity detects that a portion of a + * file's data is inconsistent with the Merkle tree, or a Merkle tree + * block needed to validate the data is inconsistent with the level + * above it. + */ + void (*file_corrupt)(struct inode *inode, loff_t pos, size_t len); }; #ifdef CONFIG_FS_VERITY diff --git a/include/trace/events/fsverity.h b/include/trace/events/fsverity.h index a8c52f21cbd5..0c842aaa4158 100644 --- a/include/trace/events/fsverity.h +++ b/include/trace/events/fsverity.h @@ -140,6 +140,25 @@ TRACE_EVENT(fsverity_verify_merkle_block, __entry->hidx) ); +TRACE_EVENT(fsverity_file_corrupt, + TP_PROTO(const struct inode *inode, loff_t pos, size_t len), + TP_ARGS(inode, pos, len), + TP_STRUCT__entry( + __field(ino_t, ino) + __field(loff_t, pos) + __field(size_t, len) + ), + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + ), + TP_printk("ino %lu pos %llu len %zu", + (unsigned long) __entry->ino, + __entry->pos, + __entry->len) +); + #endif /* _TRACE_FSVERITY_H */ /* This part must be outside protection */ -- 2.51.2 This function will be used by XFS's scrub to force fsverity activation, therefore, to read fsverity context. Signed-off-by: Andrey Albershteyn --- fs/verity/open.c | 5 +++-- include/linux/fsverity.h | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/verity/open.c b/fs/verity/open.c index dfa0d1afe0fe..0483db672526 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -344,7 +344,7 @@ int fsverity_get_descriptor(struct inode *inode, return 0; } -static int ensure_verity_info(struct inode *inode) +int fsverity_ensure_verity_info(struct inode *inode) { struct fsverity_info *vi = fsverity_get_info(inode), *found; struct fsverity_descriptor *desc; @@ -380,12 +380,13 @@ static int ensure_verity_info(struct inode *inode) kfree(desc); return err; } +EXPORT_SYMBOL_GPL(fsverity_ensure_verity_info); int __fsverity_file_open(struct inode *inode, struct file *filp) { if (filp->f_mode & FMODE_WRITE) return -EPERM; - return ensure_verity_info(inode); + return fsverity_ensure_verity_info(inode); } EXPORT_SYMBOL_GPL(__fsverity_file_open); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index d8b581e3ce48..16740a331020 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -202,6 +202,7 @@ int fsverity_get_digest(struct inode *inode, /* open.c */ int __fsverity_file_open(struct inode *inode, struct file *filp); +int fsverity_ensure_verity_info(struct inode *inode); /* read_metadata.c */ @@ -288,6 +289,12 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) WARN_ON_ONCE(1); } +static inline int fsverity_ensure_verity_info(struct inode *inode) +{ + WARN_ON_ONCE(1); + return -EOPNOTSUPP; +} + #endif /* !CONFIG_FS_VERITY */ static inline bool fsverity_verify_folio(struct fsverity_info *vi, -- 2.51.2 Filesystems implementing fsverity store fsverity metadata on similar offsets in pagecache. Prepare fsverity for consolidating this offset to the first folio after EOF folio. The max folio size is used to guarantee that mapped file will not expose fsverity metadata to userspace. So far, only XFS uses this in futher patches. Signed-off-by: Andrey Albershteyn --- fs/verity/pagecache.c | 6 ++++++ include/linux/fsverity.h | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/fs/verity/pagecache.c b/fs/verity/pagecache.c index 1819314ecaa3..73f03b48d42d 100644 --- a/fs/verity/pagecache.c +++ b/fs/verity/pagecache.c @@ -56,3 +56,9 @@ void generic_readahead_merkle_tree(struct inode *inode, pgoff_t index, folio_put(folio); } EXPORT_SYMBOL_GPL(generic_readahead_merkle_tree); + +loff_t fsverity_metadata_offset(const struct inode *inode) +{ + return roundup(i_size_read(inode), mapping_max_folio_size_supported()); +} +EXPORT_SYMBOL_GPL(fsverity_metadata_offset); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 16740a331020..278c6340849f 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -214,6 +214,7 @@ bool fsverity_verify_blocks(struct fsverity_info *vi, struct folio *folio, size_t len, size_t offset); void fsverity_verify_bio(struct fsverity_info *vi, struct bio *bio); void fsverity_enqueue_verify_work(struct work_struct *work); +loff_t fsverity_metadata_offset(const struct inode *inode); #else /* !CONFIG_FS_VERITY */ @@ -295,6 +296,12 @@ static inline int fsverity_ensure_verity_info(struct inode *inode) return -EOPNOTSUPP; } +static inline loff_t fsverity_metadata_offset(const struct inode *inode) +{ + WARN_ON_ONCE(1); + return ULLONG_MAX; +} + #endif /* !CONFIG_FS_VERITY */ static inline bool fsverity_verify_folio(struct fsverity_info *vi, -- 2.51.2 Compute the hash of one filesystem block's worth of zeros. A filesystem implementation can decide to elide merkle tree blocks containing only this hash and synthesize the contents at read time. Let's pretend that there's a file containing six data blocks and whose merkle tree looks roughly like this: root +--leaf0 | +--data0 | +--data1 | `--data2 `--leaf1 +--data3 +--data4 `--data5 If data[0-2] are sparse holes, then leaf0 will contain a repeating sequence of @zero_digest. Therefore, leaf0 need not be written to disk because its contents can be synthesized. A subsequent xfs patch will use this to reduce the size of the merkle tree when dealing with sparse gold master disk images and the like. Signed-off-by: Darrick J. Wong Signed-off-by: Andrey Albershteyn --- fs/verity/fsverity_private.h | 3 +++ fs/verity/open.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index 6e6854c19078..35636c1e2c41 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -53,6 +53,9 @@ struct merkle_tree_params { u64 tree_size; /* Merkle tree size in bytes */ unsigned long tree_pages; /* Merkle tree size in pages */ + /* the hash of a merkle block-sized buffer of zeroes */ + u8 zero_digest[FS_VERITY_MAX_DIGEST_SIZE]; + /* * Starting block index for each tree level, ordered from leaf level (0) * to root level ('num_levels - 1') diff --git a/fs/verity/open.c b/fs/verity/open.c index 0483db672526..94407a37aa08 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -153,6 +153,9 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, goto out_err; } + fsverity_hash_block(params, page_address(ZERO_PAGE(0)), + params->zero_digest); + params->tree_size = offset << log_blocksize; params->tree_pages = PAGE_ALIGN(params->tree_size) >> PAGE_SHIFT; return 0; -- 2.51.2 Helper to pre-fill folio with hashes of empty blocks. This will be used by XFS to synthesize blocks full of zero hashes on the fly. Signed-off-by: Andrey Albershteyn --- fs/verity/pagecache.c | 22 ++++++++++++++++++++++ include/linux/fsverity.h | 9 +++++++++ 2 files changed, 31 insertions(+) diff --git a/fs/verity/pagecache.c b/fs/verity/pagecache.c index 73f03b48d42d..7642a7a09dfb 100644 --- a/fs/verity/pagecache.c +++ b/fs/verity/pagecache.c @@ -2,6 +2,7 @@ /* * Copyright 2019 Google LLC */ +#include "fsverity_private.h" #include #include @@ -62,3 +63,24 @@ loff_t fsverity_metadata_offset(const struct inode *inode) return roundup(i_size_read(inode), mapping_max_folio_size_supported()); } EXPORT_SYMBOL_GPL(fsverity_metadata_offset); + +/** + * fsverity_folio_zero_hash() - fill folio with hashes of zero data block + * @folio: folio to fill + * @poff: offset in the folio to start + * @plen: length of the range to fill with hashes + * @vi: fsverity info + */ +void fsverity_folio_zero_hash(struct folio *folio, size_t poff, size_t plen, + struct fsverity_info *vi) +{ + size_t offset = poff; + + WARN_ON_ONCE(!IS_ALIGNED(poff, vi->tree_params.digest_size)); + WARN_ON_ONCE(!IS_ALIGNED(plen, vi->tree_params.digest_size)); + + for (; offset < (poff + plen); offset += vi->tree_params.digest_size) + memcpy_to_folio(folio, offset, vi->tree_params.zero_digest, + vi->tree_params.digest_size); +} +EXPORT_SYMBOL_GPL(fsverity_folio_zero_hash); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 278c6340849f..addee462dcc2 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -215,6 +215,8 @@ bool fsverity_verify_blocks(struct fsverity_info *vi, struct folio *folio, void fsverity_verify_bio(struct fsverity_info *vi, struct bio *bio); void fsverity_enqueue_verify_work(struct work_struct *work); loff_t fsverity_metadata_offset(const struct inode *inode); +void fsverity_folio_zero_hash(struct folio *folio, size_t poff, size_t plen, + struct fsverity_info *vi); #else /* !CONFIG_FS_VERITY */ @@ -302,6 +304,13 @@ static inline loff_t fsverity_metadata_offset(const struct inode *inode) return ULLONG_MAX; } +static inline void fsverity_folio_zero_hash(struct folio *folio, size_t poff, + size_t plen, + struct fsverity_info *vi) +{ + WARN_ON_ONCE(1); +} + #endif /* !CONFIG_FS_VERITY */ static inline bool fsverity_verify_folio(struct fsverity_info *vi, -- 2.51.2 Let filesystem iterate over hashes in the block and check if these are hashes of zeroed data blocks. XFS will use this to decide if it want to store tree block full of these hashes. Signed-off-by: Andrey Albershteyn --- fs/btrfs/verity.c | 6 +++++- fs/ext4/verity.c | 4 +++- fs/f2fs/verity.c | 4 +++- fs/verity/enable.c | 4 +++- include/linux/fsverity.h | 6 +++++- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index cd96fac4739f..30a2d206299f 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -776,11 +776,15 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, * @buf: Merkle tree block to write * @pos: the position of the block in the Merkle tree (in bytes) * @size: the Merkle tree block size (in bytes) + * @zero_digest: the hash of a merkle block-sized buffer of zeroes + * @digest_size: size of zero_digest, in bytes * * Returns 0 on success or negative error code on failure */ static int btrfs_write_merkle_tree_block(struct file *file, const void *buf, - u64 pos, unsigned int size) + u64 pos, unsigned int size, + const u8 *zero_digest, + unsigned int digest_size) { struct inode *inode = file_inode(file); loff_t merkle_pos = merkle_file_pos(inode); diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 5caa658adc12..3507af8a0979 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -372,7 +372,9 @@ static void ext4_readahead_merkle_tree(struct inode *inode, pgoff_t index, } static int ext4_write_merkle_tree_block(struct file *file, const void *buf, - u64 pos, unsigned int size) + u64 pos, unsigned int size, + const u8 *zero_digest, + unsigned int digest_size) { pos += ext4_verity_metadata_pos(file_inode(file)); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 92ebcc19cab0..b3b3e71604ac 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -270,7 +270,9 @@ static void f2fs_readahead_merkle_tree(struct inode *inode, pgoff_t index, } static int f2fs_write_merkle_tree_block(struct file *file, const void *buf, - u64 pos, unsigned int size) + u64 pos, unsigned int size, + const u8 *zero_digest, + unsigned int digest_size) { pos += f2fs_verity_metadata_pos(file_inode(file)); diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 42dfed1ce0ce..ad4ff71d7dd9 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -50,7 +50,9 @@ static int write_merkle_tree_block(struct file *file, const u8 *buf, int err; err = inode->i_sb->s_vop->write_merkle_tree_block(file, buf, pos, - params->block_size); + params->block_size, + params->zero_digest, + params->digest_size); if (err) fsverity_err(inode, "Error %d writing Merkle tree block %lu", err, index); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index addee462dcc2..6a97a5a73f93 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -124,6 +124,8 @@ struct fsverity_operations { * @buf: the Merkle tree block to write * @pos: the position of the block in the Merkle tree (in bytes) * @size: the Merkle tree block size (in bytes) + * @zero_digest: the hash of a merkle block-sized buffer of zeroes + * @digest_size: size of zero_digest, in bytes * * This is only called between ->begin_enable_verity() and * ->end_enable_verity(). @@ -131,7 +133,9 @@ struct fsverity_operations { * Return: 0 on success, -errno on failure */ int (*write_merkle_tree_block)(struct file *file, const void *buf, - u64 pos, unsigned int size); + u64 pos, unsigned int size, + const u8 *zero_digest, + unsigned int digest_size); /** * Notify the filesystem that file data is corrupt. -- 2.51.2 Flag to indicate to iomap that write is happening beyond EOF and no isize checks/update is needed. Signed-off-by: Andrey Albershteyn --- fs/iomap/buffered-io.c | 8 +++++--- fs/iomap/trace.h | 3 ++- include/linux/iomap.h | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ee7b845f5bc8..4cf9d0991dc1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -533,7 +533,8 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, return 0; /* zero post-eof blocks as the page may be mapped */ - if (iomap_block_needs_zeroing(iter, pos)) { + if (iomap_block_needs_zeroing(iter, pos) && + !(iomap->flags & IOMAP_F_FSVERITY)) { folio_zero_range(folio, poff, plen); iomap_set_range_uptodate(folio, poff, plen); } else { @@ -1130,13 +1131,14 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i, * unlock and release the folio. */ old_size = iter->inode->i_size; - if (pos + written > old_size) { + if (pos + written > old_size && + !(iter->iomap.flags & IOMAP_F_FSVERITY)) { i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } __iomap_put_folio(iter, write_ops, written, folio); - if (old_size < pos) + if (old_size < pos && !(iter->iomap.flags & IOMAP_F_FSVERITY)) pagecache_isize_extended(iter->inode, old_size, pos); cond_resched(); diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 532787277b16..5252051cc137 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -118,7 +118,8 @@ DEFINE_RANGE_EVENT(iomap_zero_iter); { IOMAP_F_ATOMIC_BIO, "ATOMIC_BIO" }, \ { IOMAP_F_PRIVATE, "PRIVATE" }, \ { IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }, \ - { IOMAP_F_STALE, "STALE" } + { IOMAP_F_STALE, "STALE" }, \ + { IOMAP_F_FSVERITY, "FSVERITY" } #define IOMAP_DIO_STRINGS \ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f0e3ed8ad6a6..94cf6241b37f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -87,6 +87,11 @@ struct vm_fault; #define IOMAP_F_INTEGRITY 0 #endif /* CONFIG_BLK_DEV_INTEGRITY */ +/* + * IO happens beyound inode EOF, fsverity metadata is stored there + */ +#define IOMAP_F_FSVERITY (1U << 10) + /* * Flag reserved for file system specific usage */ -- 2.51.2 fsverity metadata is stored at the next folio after largest folio containing EOF. Signed-off-by: Andrey Albershteyn --- fs/iomap/buffered-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 4cf9d0991dc1..a95f87b4efe1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1817,7 +1817,8 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) trace_iomap_writeback_folio(inode, pos, folio_size(folio)); - if (!iomap_writeback_handle_eof(folio, inode, &end_pos)) + if (!(wpc->iomap.flags & IOMAP_F_FSVERITY) && + !iomap_writeback_handle_eof(folio, inode, &end_pos)) return 0; WARN_ON_ONCE(end_pos <= pos); -- 2.51.2 The fsverity info would be used in subsequent patch to synthesize merkle blocks full of hashes of zeroed data blocks, to detect that iomap is reading fsverity descriptor, and passed down to ioend for filesystem to initiate fsverity bio verification. Signed-off-by: Andrey Albershteyn --- fs/iomap/buffered-io.c | 7 +++++++ include/linux/iomap.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a95f87b4efe1..cd74a15411cf 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "internal.h" #include "trace.h" @@ -569,6 +570,9 @@ void iomap_read_folio(const struct iomap_ops *ops, trace_iomap_readpage(iter.inode, 1); + if (fsverity_active(iter.inode)) + ctx->vi = fsverity_get_info(iter.inode); + while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_read_folio_iter(&iter, ctx, &bytes_submitted); @@ -633,6 +637,9 @@ void iomap_readahead(const struct iomap_ops *ops, trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); + if (fsverity_active(iter.inode)) + ctx->vi = fsverity_get_info(iter.inode); + while (iomap_iter(&iter, ops) > 0) iter.status = iomap_readahead_iter(&iter, ctx, &cur_bytes_submitted); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 94cf6241b37f..771962549d74 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -428,6 +428,7 @@ struct iomap_ioend { loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ void *io_private; /* file system private data */ + struct fsverity_info *io_vi; /* fsverity info */ struct bio io_bio; /* MUST BE LAST! */ }; @@ -502,6 +503,7 @@ struct iomap_read_folio_ctx { struct readahead_control *rac; void *read_ctx; loff_t read_ctx_file_offset; + struct fsverity_info *vi; }; struct iomap_read_ops { -- 2.51.2 Issue reading of fsverity merkle tree on the fsverity inodes. This way metadata will be available at I/O completion time. Signed-off-by: Andrey Albershteyn --- fs/iomap/buffered-io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index cd74a15411cf..bd3ab4e6b2bf 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -570,8 +570,12 @@ void iomap_read_folio(const struct iomap_ops *ops, trace_iomap_readpage(iter.inode, 1); - if (fsverity_active(iter.inode)) + if (fsverity_active(iter.inode)) { ctx->vi = fsverity_get_info(iter.inode); + if (iter.pos < fsverity_metadata_offset(iter.inode)) + fsverity_readahead(ctx->vi, folio->index, + folio_nr_pages(folio)); + } while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_read_folio_iter(&iter, ctx, -- 2.51.2 As fsverity metadata is not limited by EOF we also take the hole after fsverity descriptor as metadata region end. For filesystem which doesn't store merkle tree blocks full of hashes of zeroed data blocks synthesize merkle blocks full of these hashes. Signed-off-by: Andrey Albershteyn --- fs/iomap/buffered-io.c | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index bd3ab4e6b2bf..6ebf68fdc386 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -533,18 +533,45 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, if (plen == 0) return 0; + /* + * We hits this for two case: + * 1. No need to go further, the hole after fsverity descriptor + * is the end of the fsverity metadata. No ctx->vi means we are + * reading folio with descriptor. + * 2. This folio contains merkle tree blocks which need to be + * synthesized and fsverity descriptor. Skip these blocks as we + * don't know how to synthesize them yet. + */ + if ((iomap->flags & IOMAP_F_FSVERITY) && + (iomap->type == IOMAP_HOLE) && + !(ctx->vi)) { + iomap_set_range_uptodate(folio, poff, plen); + return iomap_iter_advance(iter, plen); + } + /* zero post-eof blocks as the page may be mapped */ if (iomap_block_needs_zeroing(iter, pos) && !(iomap->flags & IOMAP_F_FSVERITY)) { folio_zero_range(folio, poff, plen); iomap_set_range_uptodate(folio, poff, plen); } else { - if (!*bytes_submitted) - iomap_read_init(folio); - ret = ctx->ops->read_folio_range(iter, ctx, plen); - if (ret) - return ret; - *bytes_submitted += plen; + /* + * Synthesize zero hash folio if we are reading merkle + * tree blocks + */ + if ((iomap->flags & IOMAP_F_FSVERITY) && + (iomap->type == IOMAP_HOLE)) { + fsverity_folio_zero_hash(folio, poff, plen, + ctx->vi); + iomap_set_range_uptodate(folio, poff, plen); + } else { + if (!*bytes_submitted) + iomap_read_init(folio); + ret = ctx->ops->read_folio_range(iter, ctx, plen); + if (ret) + return ret; + *bytes_submitted += plen; + } } ret = iomap_iter_advance(iter, plen); -- 2.51.2 fsverity needs to verify consistency of the files against the root hash, the holes are also hashed in the tree. Signed-off-by: Andrey Albershteyn --- fs/iomap/buffered-io.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 6ebf68fdc386..9468c5d60b23 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -553,6 +553,9 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, if (iomap_block_needs_zeroing(iter, pos) && !(iomap->flags & IOMAP_F_FSVERITY)) { folio_zero_range(folio, poff, plen); + if (fsverity_active(iter->inode) && + !fsverity_verify_blocks(ctx->vi, folio, plen, poff)) + return -EIO; iomap_set_range_uptodate(folio, poff, plen); } else { /* -- 2.51.2 fsverity will not have file struct in ioend context (fsverity_verify_bio() path). This will cause null pointer dereference here. Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_aops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index bf985b5e73a0..36c4b2b4b07a 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -779,7 +779,7 @@ xfs_vm_read_folio( { struct iomap_read_folio_ctx ctx = { .cur_folio = folio, - .ops = xfs_bio_read_ops(XFS_I(file->f_mapping->host)), + .ops = xfs_bio_read_ops(XFS_I(folio->mapping->host)), }; iomap_read_folio(&xfs_read_iomap_ops, &ctx); -- 2.51.2 To mark inodes with fs-verity enabled the new XFS_DIFLAG2_VERITY flag will be added in further patch. This requires ro-compat flag to let older kernels know that fs with fs-verity can not be modified. Signed-off-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_format.h | 1 + fs/xfs/libxfs/xfs_sb.c | 2 ++ fs/xfs/xfs_mount.h | 2 ++ 3 files changed, 5 insertions(+) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 779dac59b1f3..64c2acd1cfca 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -374,6 +374,7 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ #define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */ +#define XFS_SB_FEAT_RO_COMPAT_VERITY (1 << 4) /* fs-verity */ #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 94c272a2ae26..744bd8480ba6 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -165,6 +165,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_REFLINK; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT) features |= XFS_FEAT_INOBTCNT; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_VERITY) + features |= XFS_FEAT_VERITY; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) features |= XFS_FEAT_FTYPE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b871dfde372b..8ef7fea8b325 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -381,6 +381,7 @@ typedef struct xfs_mount { #define XFS_FEAT_EXCHANGE_RANGE (1ULL << 27) /* exchange range */ #define XFS_FEAT_METADIR (1ULL << 28) /* metadata directory tree */ #define XFS_FEAT_ZONED (1ULL << 29) /* zoned RT device */ +#define XFS_FEAT_VERITY (1ULL << 30) /* fs-verity */ /* Mount features */ #define XFS_FEAT_NOLIFETIME (1ULL << 47) /* disable lifetime hints */ @@ -438,6 +439,7 @@ __XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE) __XFS_HAS_FEAT(metadir, METADIR) __XFS_HAS_FEAT(zoned, ZONED) __XFS_HAS_FEAT(nolifetime, NOLIFETIME) +__XFS_HAS_FEAT(verity, VERITY) static inline bool xfs_has_rtgroups(const struct xfs_mount *mp) { -- 2.51.2 Add flag to mark inodes which have fs-verity enabled on them (i.e. descriptor exist and tree is built). Signed-off-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_format.h | 7 ++++++- fs/xfs/libxfs/xfs_inode_buf.c | 8 ++++++++ fs/xfs/libxfs/xfs_inode_util.c | 2 ++ fs/xfs/xfs_iops.c | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 64c2acd1cfca..d67b404964fc 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1231,16 +1231,21 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) */ #define XFS_DIFLAG2_METADATA_BIT 5 +/* inodes sealed with fs-verity */ +#define XFS_DIFLAG2_VERITY_BIT 6 + #define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT) #define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT) #define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT) #define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT) #define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT) #define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT) +#define XFS_DIFLAG2_VERITY (1ULL << XFS_DIFLAG2_VERITY_BIT) #define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA | \ + XFS_DIFLAG2_VERITY) static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index b1812b2c3cce..c4fff7a34cbf 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -756,6 +756,14 @@ xfs_dinode_verify( !xfs_has_rtreflink(mp)) return __this_address; + /* only regular files can have fsverity */ + if (flags2 & XFS_DIFLAG2_VERITY) { + if (!xfs_has_verity(mp)) + return __this_address; + if ((mode & S_IFMT) != S_IFREG) + return __this_address; + } + if (xfs_has_zoned(mp) && dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)) { if (be32_to_cpu(dip->di_used_blocks) > mp->m_sb.sb_rgextents) diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index 309ce6dd5553..aaf51207b224 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -126,6 +126,8 @@ xfs_ip2xflags( flags |= FS_XFLAG_DAX; if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) flags |= FS_XFLAG_COWEXTSIZE; + if (ip->i_diflags2 & XFS_DIFLAG2_VERITY) + flags |= FS_XFLAG_VERITY; } if (xfs_inode_has_attr_fork(ip)) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ad94fbf55014..6b8e4e87abee 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1394,6 +1394,8 @@ xfs_diflags_to_iflags( flags |= S_NOATIME; if (init && xfs_inode_should_enable_dax(ip)) flags |= S_DAX; + if (xflags & FS_XFLAG_VERITY) + flags |= S_VERITY; /* * S_DAX can only be set during inode initialization and is never set by -- 2.51.2 fs-verity will read and attach metadata (not the tree itself) from a disk for those inodes which already have fs-verity enabled. Signed-off-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f6cc63dcf961..693d298ac388 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -36,6 +36,7 @@ #include #include #include +#include static const struct vm_operations_struct xfs_file_vm_ops; @@ -1639,11 +1640,18 @@ xfs_file_open( struct inode *inode, struct file *file) { + int error; + if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; if (xfs_get_atomic_write_min(XFS_I(inode)) > 0) file->f_mode |= FMODE_CAN_ATOMIC_WRITE; + + error = fsverity_file_open(inode, file); + if (error) + return error; + return generic_file_open(inode, file); } -- 2.51.2 fs-verity doesn't support DAX. Forbid filesystem to enable DAX on inodes which already have fs-verity enabled. The opposite is checked when fs-verity is enabled, it won't be enabled if DAX is. Signed-off-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong [djwong: fix typo in subject] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 6b8e4e87abee..44c616113734 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1366,6 +1366,8 @@ xfs_inode_should_enable_dax( return false; if (!xfs_inode_supports_dax(ip)) return false; + if (ip->i_diflags2 & XFS_DIFLAG2_VERITY) + return false; if (xfs_has_dax_always(ip->i_mount)) return true; if (ip->i_diflags2 & XFS_DIFLAG2_DAX) -- 2.51.2 The direct path is not supported on verity files. Attempts to use direct I/O path on such files should fall back to buffered I/O path. Signed-off-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong [djwong: fix braces] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 693d298ac388..78a65926de43 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -281,7 +281,8 @@ xfs_file_dax_read( struct kiocb *iocb, struct iov_iter *to) { - struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); ssize_t ret = 0; trace_xfs_file_dax_read(iocb, to); @@ -332,6 +333,14 @@ xfs_file_read_iter( if (xfs_is_shutdown(mp)) return -EIO; + /* + * In case fs-verity is enabled, we also fallback to the buffered read + * from the direct read path. Therefore, IOCB_DIRECT is set and need to + * be cleared (see generic_file_read_iter()) + */ + if (fsverity_active(inode)) + iocb->ki_flags &= ~IOCB_DIRECT; + if (IS_DAX(inode)) ret = xfs_file_dax_read(iocb, to); else if (iocb->ki_flags & IOCB_DIRECT) -- 2.51.2 Add new flag meaning that merkle tree is being build on the inode. Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_inode.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index bd6d33557194..6df48d68a919 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -415,6 +415,12 @@ static inline bool xfs_inode_can_sw_atomic_write(const struct xfs_inode *ip) */ #define XFS_IREMAPPING (1U << 15) +/* + * fs-verity's Merkle tree is under construction. The file is read-only, the + * only writes happening are for the fsverity metadata. + */ +#define XFS_VERITY_CONSTRUCTION (1U << 16) + /* All inode state flags related to inode reclaim. */ #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ XFS_IRECLAIM | \ -- 2.51.2 This is location of fsverity metadata in the file. This offset is used to store data on disk. When metadata is read into pagecache they are shifted to the offset returned by fsverity_metadata_offset(). Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_fs.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 12463ba766da..e9c92bc0e64b 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1106,4 +1106,28 @@ enum xfs_device { #define BBTOB(bbs) ((bbs) << BBSHIFT) #endif +/* + * Merkle tree and fsverity descriptor location on disk, in bytes. While this + * offset is huge, when data is read into pagecache iomap uses offset returned + * by fsverity_metadata_offset(), which is just beyound EOF. + * + * At maximum of 8 levels with 128 hashes per block (32 bytes SHA-256) maximum + * tree size is ((128^8 − 1)/(128 − 1)) = 567*10^12 blocks. This should fit in 53 + * bits address space. + * + * At this Merkle tree size we can cover 295EB large file. This is much larger + * than the currently supported file size. + * + * For sha512 the largest file we can cover ends at 1 << 50 offset, this is also + * good. + * + * The metadata is placed as follows: + * + * [merkle tree...][descriptor.............desc_size] + * ^ (1 << 53) ^ (block border) ^ (end of the block) + * ^--------------------------------^ + * Can be FS_VERITY_MAX_DESCRIPTOR_SIZE + */ +#define XFS_FSVERITY_REGION_START ((loff_t)1ULL << 53) + #endif /* __XFS_FS_H__ */ -- 2.51.2 While writing Merkle tree, file is read-only and there's no further writes except Merkle tree building. The file will be truncated beforehand to remove any preallocated extents in futher patches. The Merkle tree is the only data XFS will write. We don't want XFS to truncate any post EOF extests due to existing preallocated extents. Therefore, we also need to disable preallocations while writing merkle tree. Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_iomap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index b5d70bcb63b9..52c41ef36d6d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1941,7 +1941,9 @@ xfs_buffered_write_iomap_begin( * Determine the initial size of the preallocation. * We clean up any extra preallocation when the file is closed. */ - if (xfs_has_allocsize(mp)) + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + prealloc_blocks = 0; + else if (xfs_has_allocsize(mp)) prealloc_blocks = mp->m_allocsize_blocks; else if (allocfork == XFS_DATA_FORK) prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, -- 2.51.2 For write/writeback set IOMAP_F_FSVERITY flag telling iomap to not update inode size as this is not file data and not skip folio beyond EOF. In read path let iomap know that we are reading fsverity metadata. So, treat holes in the tree as request to synthesize tree blocks and hole after descriptor as end of the fsverity region. Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_bmap.c | 3 +++ fs/xfs/xfs_aops.c | 18 +++++++++++++++++- fs/xfs/xfs_iomap.c | 12 ++++++++++-- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 53ef4b7e504d..99a3ff2ee928 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4451,6 +4451,9 @@ xfs_bmapi_convert_one_delalloc( XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); XFS_STATS_INC(mp, xs_xstrat_quick); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + flags |= IOMAP_F_FSVERITY; + ASSERT(!isnullstartblock(bma.got.br_startblock)); xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, xfs_iomap_inode_sequence(ip, flags)); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 36c4b2b4b07a..f95dc51eb044 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -22,6 +22,7 @@ #include "xfs_icache.h" #include "xfs_zone_alloc.h" #include "xfs_rtgroup.h" +#include "xfs_fsverity.h" #include struct xfs_writepage_ctx { @@ -339,12 +340,16 @@ xfs_map_blocks( int retries = 0; int error = 0; unsigned int *seq; + unsigned int iomap_flags = 0; if (xfs_is_shutdown(mp)) return -EIO; XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + iomap_flags |= IOMAP_F_FSVERITY; + /* * COW fork blocks can overlap data fork blocks even if the blocks * aren't shared. COW I/O always takes precedent, so we must always @@ -432,7 +437,7 @@ xfs_map_blocks( isnullstartblock(imap.br_startblock)) goto allocate_blocks; - xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); + xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, iomap_flags, XFS_WPC(wpc)->data_seq); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: @@ -705,6 +710,17 @@ xfs_vm_writepages( }, }; + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) { + wbc->range_start = fsverity_metadata_offset(VFS_I(ip)); + wbc->range_end = LLONG_MAX; + wbc->nr_to_write = LONG_MAX; + /* + * Set IOMAP_F_FSVERITY to skip initial EOF check + * The following iomap->flags would be set in + * xfs_map_blocks() + */ + wpc.ctx.iomap.flags |= IOMAP_F_FSVERITY; + } return iomap_writepages(&wpc.ctx); } } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 52c41ef36d6d..6b14221ecee2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -32,6 +32,7 @@ #include "xfs_rtbitmap.h" #include "xfs_icache.h" #include "xfs_zone_alloc.h" +#include #define XFS_ALLOC_ALIGN(mp, off) \ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) @@ -1789,6 +1790,9 @@ xfs_buffered_write_iomap_begin( return xfs_direct_write_iomap_begin(inode, offset, count, flags, iomap, srcmap); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + iomap_flags |= IOMAP_F_FSVERITY; + error = xfs_qm_dqattach(ip); if (error) return error; @@ -2114,12 +2118,16 @@ xfs_read_iomap_begin( bool shared = false; unsigned int lockmode = XFS_ILOCK_SHARED; u64 seq; + unsigned int iomap_flags = 0; ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); if (xfs_is_shutdown(mp)) return -EIO; + if (fsverity_active(inode) && offset >= XFS_FSVERITY_REGION_START) + iomap_flags |= IOMAP_F_FSVERITY; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; @@ -2133,8 +2141,8 @@ xfs_read_iomap_begin( if (error) return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - shared ? IOMAP_F_SHARED : 0, seq); + iomap_flags |= shared ? IOMAP_F_SHARED : 0; + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); } const struct iomap_ops xfs_read_iomap_ops = { -- 2.51.2 Simple helper to check that this is not fsverity metadata but file data that needs verification. XFS will use this in iomap callbacks to check what is being read. Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_fsverity.c | 21 +++++++++++++++++++++ fs/xfs/xfs_fsverity.h | 22 ++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 fs/xfs/xfs_fsverity.c create mode 100644 fs/xfs/xfs_fsverity.h diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c new file mode 100644 index 000000000000..47add19a241e --- /dev/null +++ b/fs/xfs/xfs_fsverity.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2026 Red Hat, Inc. + */ +#include "xfs.h" +#include "xfs_format.h" +#include "xfs_inode.h" +#include "xfs_fsverity.h" +#include + +bool +xfs_fsverity_sealed_data( + const struct xfs_inode *ip, + loff_t offset) +{ + const struct inode *inode = VFS_IC(ip); + + return fsverity_active(inode) && + (offset < fsverity_metadata_offset(inode)); +} + diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h new file mode 100644 index 000000000000..5fc55f42b317 --- /dev/null +++ b/fs/xfs/xfs_fsverity.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2026 Red Hat, Inc. + */ +#ifndef __XFS_FSVERITY_H__ +#define __XFS_FSVERITY_H__ + +#include "xfs.h" + +#ifdef CONFIG_FS_VERITY +bool xfs_fsverity_sealed_data(const struct xfs_inode *ip, + loff_t offset); +#else +static inline loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, + loff_t pos) +{ + WARN_ON_ONCE(1); + return ULLONG_MAX; +} +#endif /* CONFIG_FS_VERITY */ + +#endif /* __XFS_FSVERITY_H__ */ -- 2.51.2 The ioends are offloaded to workqueue for further processing of completed BIOs. Use read end ioends for fsverity verification. Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_aops.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f95dc51eb044..9d4fc3322ec7 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -204,11 +204,15 @@ xfs_end_io( io_list))) { list_del_init(&ioend->io_list); iomap_ioend_try_merge(ioend, &tmp); - if (bio_op(&ioend->io_bio) == REQ_OP_READ) + if (bio_op(&ioend->io_bio) == REQ_OP_READ) { + if (xfs_fsverity_sealed_data(ip, ioend->io_offset)) + fsverity_verify_bio(ioend->io_vi, + &ioend->io_bio); iomap_finish_ioends(ioend, blk_status_to_errno(ioend->io_bio.bi_status)); - else + } else { xfs_end_ioend_write(ioend); + } cond_resched(); } } @@ -766,9 +770,12 @@ xfs_bio_submit_read( struct iomap_read_folio_ctx *ctx) { struct bio *bio = ctx->read_ctx; + struct iomap_ioend *ioend; /* delay read completions to the ioend workqueue */ - iomap_init_ioend(iter->inode, bio, ctx->read_ctx_file_offset, 0); + ioend = iomap_init_ioend(iter->inode, bio, ctx->read_ctx_file_offset, 0); + ioend->io_vi = ctx->vi; + bio->bi_end_io = xfs_end_bio; submit_bio(bio); } @@ -781,10 +788,13 @@ static const struct iomap_read_ops xfs_bio_read_integrity_ops = { static inline const struct iomap_read_ops * xfs_bio_read_ops( - const struct xfs_inode *ip) + const struct xfs_inode *ip, + loff_t offset) { if (bdev_has_integrity_csum(xfs_inode_buftarg(ip)->bt_bdev)) return &xfs_bio_read_integrity_ops; + if (xfs_fsverity_sealed_data(ip, offset)) + return &xfs_bio_read_integrity_ops; return &iomap_bio_read_ops; } @@ -793,9 +803,11 @@ xfs_vm_read_folio( struct file *file, struct folio *folio) { + const struct iomap_read_ops *ops = xfs_bio_read_ops( + XFS_I(folio->mapping->host), folio_pos(folio)); struct iomap_read_folio_ctx ctx = { .cur_folio = folio, - .ops = xfs_bio_read_ops(XFS_I(folio->mapping->host)), + .ops = ops, }; iomap_read_folio(&xfs_read_iomap_ops, &ctx); @@ -806,9 +818,11 @@ STATIC void xfs_vm_readahead( struct readahead_control *rac) { + const struct iomap_read_ops *ops = xfs_bio_read_ops( + XFS_I(rac->mapping->host), readahead_pos(rac)); struct iomap_read_folio_ctx ctx = { .rac = rac, - .ops = xfs_bio_read_ops(XFS_I(rac->mapping->host)), + .ops = ops, }; iomap_readahead(&xfs_read_iomap_ops, &ctx); -- 2.51.2 This helpers converts offset which XFS uses to store fsverity metadata on disk to the offset in the pagecache. Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_fsverity.c | 14 ++++++++++++++ fs/xfs/xfs_fsverity.h | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index 47add19a241e..4b918eb746d7 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -8,6 +8,20 @@ #include "xfs_fsverity.h" #include +loff_t +xfs_fsverity_offset_to_disk(struct xfs_inode *ip, loff_t offset) +{ + return (offset - fsverity_metadata_offset(VFS_I(ip))) | + XFS_FSVERITY_REGION_START; +} + +loff_t +xfs_fsverity_offset_from_disk(struct xfs_inode *ip, loff_t offset) +{ + return (offset ^ XFS_FSVERITY_REGION_START) + + fsverity_metadata_offset(VFS_I(ip)); +} + bool xfs_fsverity_sealed_data( const struct xfs_inode *ip, diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h index 5fc55f42b317..6f3d60f010d8 100644 --- a/fs/xfs/xfs_fsverity.h +++ b/fs/xfs/xfs_fsverity.h @@ -10,6 +10,8 @@ #ifdef CONFIG_FS_VERITY bool xfs_fsverity_sealed_data(const struct xfs_inode *ip, loff_t offset); +loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, loff_t pos); +loff_t xfs_fsverity_offset_from_disk(struct xfs_inode *ip, loff_t offset); #else static inline loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, loff_t pos) @@ -17,6 +19,17 @@ static inline loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, WARN_ON_ONCE(1); return ULLONG_MAX; } +static inline loff_t xfs_fsverity_offset_from_disk(struct xfs_inode *ip, + loff_t offset) +{ + WARN_ON_ONCE(1); + return ULLONG_MAX; +} +static inline bool xfs_fsverity_sealed_data(const struct xfs_inode *ip, + loff_t offset) +{ + return false; +} #endif /* CONFIG_FS_VERITY */ #endif /* __XFS_FSVERITY_H__ */ -- 2.51.2 A little helper for xfs_bmbt_to_iomap() to decide if offset needs to be converted from a large disk one to smaller page cache one. Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_fsverity.c | 28 ++++++++++++++++++++++++++++ fs/xfs/xfs_fsverity.h | 9 +++++++++ 2 files changed, 37 insertions(+) diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index 4b918eb746d7..4f8a40317dc3 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -5,8 +5,13 @@ #include "xfs.h" #include "xfs_format.h" #include "xfs_inode.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_fsverity.h" #include "xfs_fsverity.h" #include +#include loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, loff_t offset) @@ -33,3 +38,26 @@ xfs_fsverity_sealed_data( (offset < fsverity_metadata_offset(inode)); } +/* + * A little helper for xfs_bmbt_to_iomap to decide if offset needs to be + * converted from a large disk one to smaller page cache one. + * + * As xfs_bmbt_to_iomap() can be used during writing (tree building) and reading + * (fsverity enabled) we need to check for both cases. + */ +bool +xfs_fsverity_need_convert_offset( + struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + unsigned int mapping_flags) +{ + struct xfs_mount *mp = ip->i_mount; + + return (fsverity_active(VFS_I(ip)) || + xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) && + (XFS_FSB_TO_B(mp, imap->br_startoff) >= + XFS_FSVERITY_REGION_START) && + !(mapping_flags & IOMAP_REPORT); + +} + diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h index 6f3d60f010d8..ab01ceef4d15 100644 --- a/fs/xfs/xfs_fsverity.h +++ b/fs/xfs/xfs_fsverity.h @@ -12,6 +12,9 @@ bool xfs_fsverity_sealed_data(const struct xfs_inode *ip, loff_t offset); loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, loff_t pos); loff_t xfs_fsverity_offset_from_disk(struct xfs_inode *ip, loff_t offset); +bool xfs_fsverity_need_convert_offset(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + unsigned int mapping_flags); #else static inline loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, loff_t pos) @@ -30,6 +33,12 @@ static inline bool xfs_fsverity_sealed_data(const struct xfs_inode *ip, { return false; } +static inline bool xfs_fsverity_need_convert_offset(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + unsigned int mapping_flags) +{ + return false; +} #endif /* CONFIG_FS_VERITY */ #endif /* __XFS_FSVERITY_H__ */ -- 2.51.2 Convert between pagecache and on-disk offset while reading/writing fsverity metadata through iomap. We can not use on-disk (1ULL << 53) offset for pagecache as it doesn't fit into 32-bit address space and the page radix tree is going to be quite high on 64-bit. To prevent this we use lower offset, right after EOF. The fsverity_metadata_offset() sets it to be next largest folio after EOF. We can not use this pagecache offset for on-disk file offset though, as this is folio size dependent. Therefore, for on-disk we use offset far beyond EOF which allows to use largest file size supported by fsverity. Also don't convert offset if IOMAP_REPORT is set as we need to see real extents for fiemap. Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_bmap.c | 12 ++++++++++-- fs/xfs/xfs_aops.c | 13 ++++++++++--- fs/xfs/xfs_iomap.c | 33 ++++++++++++++++++++++++++------- 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 99a3ff2ee928..05fddd34c697 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -41,6 +41,8 @@ #include "xfs_inode_util.h" #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" +#include "xfs_fsverity.h" +#include struct kmem_cache *xfs_bmap_intent_cache; @@ -4451,7 +4453,9 @@ xfs_bmapi_convert_one_delalloc( XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); XFS_STATS_INC(mp, xs_xstrat_quick); - if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION) && + XFS_FSB_TO_B(mp, bma.got.br_startoff) >= + fsverity_metadata_offset(VFS_I(ip))) flags |= IOMAP_F_FSVERITY; ASSERT(!isnullstartblock(bma.got.br_startblock)); @@ -4495,6 +4499,10 @@ xfs_bmapi_convert_delalloc( unsigned int *seq) { int error; + loff_t iomap_offset = offset; + + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + iomap_offset = xfs_fsverity_offset_from_disk(ip, offset); /* * Attempt to allocate whatever delalloc extent currently backs offset @@ -4507,7 +4515,7 @@ xfs_bmapi_convert_delalloc( iomap, seq); if (error) return error; - } while (iomap->offset + iomap->length <= offset); + } while (iomap->offset + iomap->length <= iomap_offset); return 0; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 9d4fc3322ec7..53aeea5e9ebd 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -335,8 +335,8 @@ xfs_map_blocks( struct xfs_inode *ip = XFS_I(wpc->inode); struct xfs_mount *mp = ip->i_mount; ssize_t count = i_blocksize(wpc->inode); - xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); + xfs_fileoff_t offset_fsb; + xfs_fileoff_t end_fsb; xfs_fileoff_t cow_fsb; int whichfork; struct xfs_bmbt_irec imap; @@ -351,8 +351,12 @@ xfs_map_blocks( XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); - if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) { iomap_flags |= IOMAP_F_FSVERITY; + offset = xfs_fsverity_offset_to_disk(ip, offset); + } + offset_fsb = XFS_B_TO_FSBT(mp, offset); + end_fsb = XFS_B_TO_FSB(mp, offset + count); /* * COW fork blocks can overlap data fork blocks even if the blocks @@ -484,6 +488,9 @@ xfs_map_blocks( wpc->iomap.length = cow_offset - wpc->iomap.offset; } + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + offset = xfs_fsverity_offset_from_disk(ip, offset); + ASSERT(wpc->iomap.offset <= offset); ASSERT(wpc->iomap.offset + wpc->iomap.length > offset); trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 6b14221ecee2..a04361cf0e99 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -32,6 +32,7 @@ #include "xfs_rtbitmap.h" #include "xfs_icache.h" #include "xfs_zone_alloc.h" +#include "xfs_fsverity.h" #include #define XFS_ALLOC_ALIGN(mp, off) \ @@ -142,7 +143,11 @@ xfs_bmbt_to_iomap( xfs_rtbno_is_group_start(mp, imap->br_startblock)) iomap->flags |= IOMAP_F_BOUNDARY; } - iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); + if (xfs_fsverity_need_convert_offset(ip, imap, mapping_flags)) + iomap->offset = xfs_fsverity_offset_from_disk( + ip, XFS_FSB_TO_B(mp, imap->br_startoff)); + else + iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); iomap->flags = iomap_flags; if (mapping_flags & IOMAP_DAX) { @@ -629,6 +634,8 @@ xfs_iomap_write_unwritten( uint resblks; int error; + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + offset = xfs_fsverity_offset_to_disk(ip, offset); trace_xfs_unwritten_convert(ip, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1766,8 +1773,8 @@ xfs_buffered_write_iomap_begin( iomap); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count); + xfs_fileoff_t offset_fsb; + xfs_fileoff_t end_fsb; struct xfs_bmbt_irec imap, cmap; struct xfs_iext_cursor icur, ccur; xfs_fsblock_t prealloc_blocks = 0; @@ -1790,8 +1797,12 @@ xfs_buffered_write_iomap_begin( return xfs_direct_write_iomap_begin(inode, offset, count, flags, iomap, srcmap); - if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) { iomap_flags |= IOMAP_F_FSVERITY; + offset = xfs_fsverity_offset_to_disk(ip, offset); + } + offset_fsb = XFS_B_TO_FSBT(mp, offset); + end_fsb = xfs_iomap_end_fsb(mp, offset, count); error = xfs_qm_dqattach(ip); if (error) @@ -2112,8 +2123,8 @@ xfs_read_iomap_begin( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; - xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); + xfs_fileoff_t offset_fsb; + xfs_fileoff_t end_fsb; int nimaps = 1, error = 0; bool shared = false; unsigned int lockmode = XFS_ILOCK_SHARED; @@ -2125,8 +2136,15 @@ xfs_read_iomap_begin( if (xfs_is_shutdown(mp)) return -EIO; - if (fsverity_active(inode) && offset >= XFS_FSVERITY_REGION_START) + if (fsverity_active(inode) && + (offset >= fsverity_metadata_offset(inode)) && + !(flags & IOMAP_REPORT)) { iomap_flags |= IOMAP_F_FSVERITY; + offset = xfs_fsverity_offset_to_disk(ip, offset); + } + + offset_fsb = XFS_B_TO_FSBT(mp, offset); + end_fsb = xfs_iomap_end_fsb(mp, offset, length); error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) @@ -2142,6 +2160,7 @@ xfs_read_iomap_begin( return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); iomap_flags |= shared ? IOMAP_F_SHARED : 0; + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); } -- 2.51.2 Add integration with fs-verity. XFS stores fs-verity descriptor and Merkle tree in the inode data fork at offset file offset (1 << 53). The Merkle tree reading/writing is done through iomap interface. The data itself are read to the inode's page cache. When XFS reads from this region iomap doesn't call into fsverity to verify it against Merkle tree. For data, verification is done on BIO completion in a workqueue. When fs-verity is enabled on an inode, the XFS_IVERITY_CONSTRUCTION flag is set meaning that the Merkle tree is being build. The initialization ends with storing of verity descriptor and setting inode on-disk flag (XFS_DIFLAG2_VERITY). The XFS_IVERITY_CONSTRUCTION is dropped and I_VERITY is set on inode. The descriptor is stored in a new block after the last Merkle tree block. The size of the descriptor is stored at the end of the last descriptor block (descriptor can be multiple blocks). Signed-off-by: Andrey Albershteyn --- fs/xfs/Makefile | 1 + fs/xfs/xfs_bmap_util.c | 8 + fs/xfs/xfs_fsverity.c | 423 ++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_fsverity.h | 2 + fs/xfs/xfs_message.c | 4 + fs/xfs/xfs_message.h | 1 + fs/xfs/xfs_mount.h | 2 + fs/xfs/xfs_super.c | 7 + 8 files changed, 447 insertions(+), 1 deletion(-) diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 5bf501cf8271..ad66439db7bf 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -147,6 +147,7 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o +xfs-$(CONFIG_FS_VERITY) += xfs_fsverity.o # notify failure ifeq ($(CONFIG_MEMORY_FAILURE),y) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2208a720ec3f..3cd638e4f8b9 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -31,6 +31,7 @@ #include "xfs_rtbitmap.h" #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" +#include /* Kernel only BMAP related definitions and functions */ @@ -553,6 +554,13 @@ xfs_can_free_eofblocks( if (last_fsb <= end_fsb) return false; + /* + * Nothing to clean on fsverity inodes as they don't use prealloc and + * there no delalloc as only written data is fsverity metadata + */ + if (IS_VERITY(VFS_I(ip))) + return false; + /* * Check if there is an post-EOF extent to free. If there are any * delalloc blocks attached to the inode (data fork delalloc diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index 4f8a40317dc3..cdbe26ab88b7 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -4,14 +4,26 @@ */ #include "xfs.h" #include "xfs_format.h" -#include "xfs_inode.h" #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_fsverity.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_log_format.h" +#include "xfs_bmap_util.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_trace.h" +#include "xfs_quota.h" #include "xfs_fsverity.h" +#include "xfs_iomap.h" +#include "xfs_error.h" +#include "xfs_health.h" #include #include +#include loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, loff_t offset) @@ -61,3 +73,412 @@ xfs_fsverity_need_convert_offset( } +static int +xfs_fsverity_read( + struct inode *inode, + void *buf, + size_t count, + loff_t pos) +{ + struct folio *folio; + size_t n; + + while (count) { + folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, + NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + + n = memcpy_from_file_folio(buf, folio, pos, count); + folio_put(folio); + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +static int +xfs_fsverity_write( + struct file *file, + loff_t pos, + size_t length, + const void *buf) +{ + int ret; + struct iov_iter iiter; + struct kvec kvec = { + .iov_base = (void *)buf, + .iov_len = length, + }; + struct kiocb iocb = { + .ki_filp = file, + .ki_ioprio = get_current_ioprio(), + .ki_pos = pos, + }; + + iov_iter_kvec(&iiter, WRITE, &kvec, 1, length); + + ret = iomap_file_buffered_write(&iocb, &iiter, + &xfs_buffered_write_iomap_ops, + &xfs_iomap_write_ops, NULL); + if (ret < 0) + return ret; + return 0; +} + +/* + * In cases when merkle tree block (1k) == fs block size (1k) and less than + * PAGE_SIZE (4k) we can get the following layout in the file: + * + * [ merkle block | 1k hole | 1k hole | fsverity descriptor] + * + * These holes are merkle tree blocks which are filled by iomap with hashes of + * zeroed data blocks. + * + * Anything in fsverity starts with reading a descriptor. When iomap reads this + * page for the descriptor it doesn't know how to synthesize those merkle tree + * blocks. So, those are left with random data and marked uptodate. + * + * After we're done with reading the descriptor we invalidate the page + * containing descriptor. As a descriptor for this inode is already searchable + * in the hashtable, iomap can synthesize these blocks when requested again. + */ +static int +xfs_fsverity_drop_descriptor_page( + struct inode *inode, + u64 offset) +{ + pgoff_t index = offset >> PAGE_SHIFT; + + return invalidate_inode_pages2_range(inode->i_mapping, index, index); +} + +/* + * Retrieve the verity descriptor. + */ +static int +xfs_fsverity_get_descriptor( + struct inode *inode, + void *buf, + size_t buf_size) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + __be32 d_desc_size; + u32 desc_size; + u64 desc_size_pos; + int error; + u64 desc_pos; + struct xfs_bmbt_irec rec; + int is_empty; + uint32_t blocksize = i_blocksize(VFS_I(ip)); + xfs_fileoff_t last_block_offset; + + ASSERT(inode->i_flags & S_VERITY); + error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &rec, &is_empty); + if (error) + return error; + + if (is_empty) + return -ENODATA; + + last_block_offset = + XFS_FSB_TO_B(mp, rec.br_startoff + rec.br_blockcount); + if (last_block_offset < XFS_FSVERITY_REGION_START) + return -ENODATA; + + desc_size_pos = xfs_fsverity_offset_from_disk(ip, last_block_offset) - + sizeof(__be32); + error = xfs_fsverity_read(inode, (char *)&d_desc_size, + sizeof(d_desc_size), desc_size_pos); + if (error) + return error; + + desc_size = be32_to_cpu(d_desc_size); + if (XFS_IS_CORRUPT(mp, desc_size > FS_VERITY_MAX_DESCRIPTOR_SIZE)) + return -ERANGE; + if (XFS_IS_CORRUPT(mp, desc_size > desc_size_pos)) + return -ERANGE; + + if (!buf_size) + return desc_size; + + if (XFS_IS_CORRUPT(mp, desc_size > buf_size)) + return -ERANGE; + + desc_pos = round_down(desc_size_pos - desc_size, blocksize); + error = xfs_fsverity_read(inode, buf, desc_size, desc_pos); + if (error) + return error; + + xfs_fsverity_drop_descriptor_page(inode, desc_pos); + + return desc_size; +} + +static int +xfs_fsverity_write_descriptor( + struct file *file, + const void *desc, + u32 desc_size, + u64 merkle_tree_size) +{ + int error; + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + unsigned int blksize = ip->i_mount->m_attr_geo->blksize; + u64 tree_last_block = + fsverity_metadata_offset(VFS_I(ip)) + merkle_tree_size; + u64 desc_pos = round_up(tree_last_block, blksize); + u64 desc_end = desc_pos + desc_size; + __be32 desc_size_disk = cpu_to_be32(desc_size); + u64 desc_size_pos = + round_up(desc_end + sizeof(desc_size_disk), blksize) - + sizeof(desc_size_disk); + + error = xfs_fsverity_write(file, desc_size_pos, + sizeof(__be32), + (const void *)&desc_size_disk); + if (error) + return error; + + error = xfs_fsverity_write(file, desc_pos, desc_size, desc); + + xfs_fsverity_drop_descriptor_page(inode, desc_pos); + return error; +} + +/* + * Try to remove all the fsverity metadata after a failed enablement. + */ +static int +xfs_fsverity_delete_metadata( + struct xfs_inode *ip) +{ + struct xfs_trans *tp; + struct xfs_mount *mp = ip->i_mount; + int error; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + /* + * We removing post EOF data, no need to update i_size as fsverity + * didn't move i_size in the first place + */ + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, XFS_ISIZE(ip)); + if (error) + goto err_cancel; + + error = xfs_trans_commit(tp); + if (error) + goto err_cancel; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return error; + +err_cancel: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_trans_cancel(tp); + return error; +} + + +/* + * Prepare to enable fsverity by clearing old metadata. + */ +static int +xfs_fsverity_begin_enable( + struct file *filp) +{ + struct inode *inode = file_inode(filp); + struct xfs_inode *ip = XFS_I(inode); + int error; + + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); + + if (IS_DAX(inode)) + return -EINVAL; + + if (inode->i_size > XFS_FSVERITY_REGION_START) + return -EFBIG; + + if (xfs_iflags_test_and_set(ip, XFS_VERITY_CONSTRUCTION)) + return -EBUSY; + + error = xfs_qm_dqattach(ip); + if (error) + return error; + + /* + * Flush pagecache before building Merkle tree. Inode is locked and no + * further writes will happen to the file except fsverity metadata + */ + error = filemap_write_and_wait(inode->i_mapping); + if (error) + return error; + + return xfs_fsverity_delete_metadata(ip); +} + +/* + * Complete (or fail) the process of enabling fsverity. + */ +static int +xfs_fsverity_end_enable( + struct file *file, + const void *desc, + size_t desc_size, + u64 merkle_tree_size) +{ + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error = 0; + + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); + + /* fs-verity failed, just cleanup */ + if (desc == NULL) + goto out; + + error = xfs_fsverity_write_descriptor(file, desc, desc_size, + merkle_tree_size); + if (error) + goto out; + + /* + * Wait for Merkle tree get written to disk before setting on-disk inode + * flag and clearing XFS_VERITY_CONSTRUCTION + */ + error = filemap_write_and_wait(inode->i_mapping); + if (error) + goto out; + + /* + * Set fsverity inode flag + */ + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange, + 0, 0, false, &tp); + if (error) + goto out; + + /* + * Ensure that we've persisted the verity information before we enable + * it on the inode and tell the caller we have sealed the inode. + */ + ip->i_diflags2 |= XFS_DIFLAG2_VERITY; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + if (!error) + inode->i_flags |= S_VERITY; + +out: + if (error) { + int error2; + + error2 = xfs_fsverity_delete_metadata(ip); + if (error2) + xfs_alert(ip->i_mount, +"ino 0x%llx failed to clean up new fsverity metadata, err %d", + ip->i_ino, error2); + } + + xfs_iflags_clear(ip, XFS_VERITY_CONSTRUCTION); + return error; +} + +/* + * Retrieve a merkle tree block. + */ +static struct page * +xfs_fsverity_read_merkle( + struct inode *inode, + pgoff_t index) +{ + pgoff_t metadata_idx = + (fsverity_metadata_offset(inode) >> PAGE_SHIFT); + pgoff_t idx = index + metadata_idx; + + return generic_read_merkle_tree_page(inode, idx); +} + +/* + * Retrieve a merkle tree block. + */ +static void +xfs_fsverity_readahead_merkle_tree( + struct inode *inode, + pgoff_t index, + unsigned long nr_pages) +{ + pgoff_t metadata_idx = + (fsverity_metadata_offset(inode) >> PAGE_SHIFT); + pgoff_t idx = index + metadata_idx; + + generic_readahead_merkle_tree(inode, idx, nr_pages); +} + +/* + * Write a merkle tree block. + */ +static int +xfs_fsverity_write_merkle( + struct file *file, + const void *buf, + u64 pos, + unsigned int size, + const u8 *zero_digest, + unsigned int digest_size) +{ + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + loff_t position = pos + fsverity_metadata_offset(inode); + const char *p; + unsigned int i; + + if (position + size > inode->i_sb->s_maxbytes) + return -EFBIG; + + /* + * If this is a block full of hashes of zeroed blocks, don't bother + * storing the block. We can synthesize them later. + * + * However, do this only in case Merkle tree block == fs block size. + * Iomap synthesizes these blocks based on holes in the merkle tree. We + * won't be able to tell if something need to be synthesizes for the + * range in the fs block. For example, for 4k filesystem block + * + * [ 1k | zero hashes | zero hashes | 1k ] + * + * Iomap won't know about these empty blocks. + */ + for (i = 0, p = buf; i < size; i += digest_size, p += digest_size) + if (memcmp(p, zero_digest, digest_size)) + break; + if ((i == size) && (size == ip->i_mount->m_sb.sb_blocksize)) + return 0; + + return xfs_fsverity_write(file, position, size, buf); +} + +const struct fsverity_operations xfs_fsverity_ops = { + .begin_enable_verity = xfs_fsverity_begin_enable, + .end_enable_verity = xfs_fsverity_end_enable, + .get_verity_descriptor = xfs_fsverity_get_descriptor, + .read_merkle_tree_page = xfs_fsverity_read_merkle, + .readahead_merkle_tree = xfs_fsverity_readahead_merkle_tree, + .write_merkle_tree_block = xfs_fsverity_write_merkle, +}; diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h index ab01ceef4d15..91bcb86d1b3b 100644 --- a/fs/xfs/xfs_fsverity.h +++ b/fs/xfs/xfs_fsverity.h @@ -6,8 +6,10 @@ #define __XFS_FSVERITY_H__ #include "xfs.h" +#include #ifdef CONFIG_FS_VERITY +extern const struct fsverity_operations xfs_fsverity_ops; bool xfs_fsverity_sealed_data(const struct xfs_inode *ip, loff_t offset); loff_t xfs_fsverity_offset_to_disk(struct xfs_inode *ip, loff_t pos); diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 19aba2c3d525..c6eb68c3e50f 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -161,6 +161,10 @@ xfs_warn_experimental( .opstate = XFS_OPSTATE_WARNED_ZONED, .name = "zoned RT device", }, + [XFS_EXPERIMENTAL_FSVERITY] = { + .opstate = XFS_OPSTATE_WARNED_FSVERITY, + .name = "fsverity", + }, }; ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX); BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX); diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index d68e72379f9d..1647d32ea4ac 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -96,6 +96,7 @@ enum xfs_experimental_feat { XFS_EXPERIMENTAL_LBS, XFS_EXPERIMENTAL_METADIR, XFS_EXPERIMENTAL_ZONED, + XFS_EXPERIMENTAL_FSVERITY, XFS_EXPERIMENTAL_MAX, }; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8ef7fea8b325..4fa9b28482ac 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -579,6 +579,8 @@ __XFS_HAS_FEAT(nouuid, NOUUID) #define XFS_OPSTATE_WARNED_ZONED 19 /* (Zoned) GC is in progress */ #define XFS_OPSTATE_ZONEGC_RUNNING 20 +/* Kernel has logged a warning about fsverity support */ +#define XFS_OPSTATE_WARNED_FSVERITY 21 #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bc71aa9dcee8..ac385955cbf4 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -30,6 +30,7 @@ #include "xfs_filestream.h" #include "xfs_quota.h" #include "xfs_sysfs.h" +#include "xfs_fsverity.h" #include "xfs_ondisk.h" #include "xfs_rmap_item.h" #include "xfs_refcount_item.h" @@ -1704,6 +1705,9 @@ xfs_fs_fill_super( sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif sb->s_op = &xfs_super_operations; +#ifdef CONFIG_FS_VERITY + sb->s_vop = &xfs_fsverity_ops; +#endif /* * Delay mount work if the debug hook is set. This is debug @@ -1961,6 +1965,9 @@ xfs_fs_fill_super( if (error) goto out_filestream_unmount; + if (xfs_has_verity(mp)) + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_FSVERITY); + root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = -ENOENT; -- 2.51.2 Add fs-verity ioctls to enable, dump metadata (descriptor and Merkle tree pages) and obtain file's digest. [djwong: remove unnecessary casting] Signed-off-by: Darrick J. Wong Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_ioctl.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 59eaad774371..d343af6aa0c6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -44,6 +44,7 @@ #include #include +#include /* Return 0 on success or positive error */ int @@ -1419,6 +1420,21 @@ xfs_file_ioctl( case XFS_IOC_COMMIT_RANGE: return xfs_ioc_commit_range(filp, arg); + case FS_IOC_ENABLE_VERITY: + if (!xfs_has_verity(mp)) + return -EOPNOTSUPP; + return fsverity_ioctl_enable(filp, arg); + + case FS_IOC_MEASURE_VERITY: + if (!xfs_has_verity(mp)) + return -EOPNOTSUPP; + return fsverity_ioctl_measure(filp, arg); + + case FS_IOC_READ_VERITY_METADATA: + if (!xfs_has_verity(mp)) + return -EOPNOTSUPP; + return fsverity_ioctl_read_metadata(filp, arg); + default: return -ENOTTY; } -- 2.51.2 From: "Darrick J. Wong" Advertise that this filesystem supports fsverity. Signed-off-by: Darrick J. Wong Reviewed-by: Andrey Albershteyn Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/libxfs/xfs_sb.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index e9c92bc0e64b..abe5d8e3eed7 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -250,6 +250,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */ #define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */ #define XFS_FSOP_GEOM_FLAGS_ZONED (1 << 27) /* zoned rt device */ +#define XFS_FSOP_GEOM_FLAGS_VERITY (1 << 28) /* fs-verity */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 744bd8480ba6..fe400bffa528 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1587,6 +1587,8 @@ xfs_fs_geometry( geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR; if (xfs_has_zoned(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ZONED; + if (xfs_has_verity(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_VERITY; geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); -- 2.51.2 From: "Darrick J. Wong" If an inode has the incore verity iflag set, make sure that we can actually activate fsverity on that inode. If activation fails due to a fsverity metadata validation error, clear the flag. The usage model for fsverity requires that any program that cares about verity state is required to call statx/getflags to check that the flag is set after opening the file, so clearing the flag will not compromise that model. Signed-off-by: Darrick J. Wong Signed-off-by: Andrey Albershteyn --- fs/xfs/scrub/attr.c | 7 +++++ fs/xfs/scrub/common.c | 53 +++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/common.h | 2 ++ fs/xfs/scrub/inode.c | 7 +++++ fs/xfs/scrub/inode_repair.c | 36 +++++++++++++++++++++++++ 5 files changed, 105 insertions(+) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 708334f9b2bd..b1448832ae6b 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -646,6 +646,13 @@ xchk_xattr( if (!xfs_inode_hasattr(sc->ip)) return -ENOENT; + /* + * If this is a verity file that won't activate, we cannot check the + * merkle tree geometry. + */ + if (xchk_inode_verity_broken(sc->ip)) + xchk_set_incomplete(sc); + /* Allocate memory for xattr checking. */ error = xchk_setup_xattr_buf(sc, 0); if (error == -ENOMEM) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 7bfa37c99480..888e07df713f 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -45,6 +45,8 @@ #include "scrub/health.h" #include "scrub/tempfile.h" +#include + /* Common code for the metadata scrubbers. */ /* @@ -1736,3 +1738,54 @@ xchk_inode_count_blocks( return xfs_bmap_count_blocks(sc->tp, sc->ip, whichfork, nextents, count); } + +/* + * If this inode has S_VERITY set on it, read the verity info. If the reading + * fails with anything other than ENOMEM, the file is corrupt, which we can + * detect later with fsverity_active. + * + * Callers must hold the IOLOCK and must not hold the ILOCK of sc->ip because + * activation reads inode data. + */ +int +xchk_inode_setup_verity( + struct xfs_scrub *sc) +{ + int error; + + if (!fsverity_active(VFS_I(sc->ip))) + return 0; + + error = fsverity_ensure_verity_info(VFS_I(sc->ip)); + switch (error) { + case 0: + /* fsverity is active */ + break; + case -ENODATA: + case -EMSGSIZE: + case -EINVAL: + case -EFSCORRUPTED: + case -EFBIG: + /* + * The nonzero errno codes above are the error codes that can + * be returned from fsverity on metadata validation errors. + */ + return 0; + default: + /* runtime errors */ + return error; + } + + return 0; +} + +/* + * Is this a verity file that failed to activate? Callers must have tried to + * activate fsverity via xchk_inode_setup_verity. + */ +bool +xchk_inode_verity_broken( + struct xfs_inode *ip) +{ + return fsverity_active(VFS_I(ip)) && !fsverity_get_info(VFS_I(ip)); +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index ddbc065c798c..36d6a3333730 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -289,6 +289,8 @@ int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino, bool *inuse); int xchk_inode_count_blocks(struct xfs_scrub *sc, int whichfork, xfs_extnum_t *nextents, xfs_filblks_t *count); +int xchk_inode_setup_verity(struct xfs_scrub *sc); +bool xchk_inode_verity_broken(struct xfs_inode *ip); bool xchk_inode_is_dirtree_root(const struct xfs_inode *ip); bool xchk_inode_is_sb_rooted(const struct xfs_inode *ip); diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index bb3f475b6353..1e7cfef00ab0 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -36,6 +36,10 @@ xchk_prepare_iscrub( xchk_ilock(sc, XFS_IOLOCK_EXCL); + error = xchk_inode_setup_verity(sc); + if (error) + return error; + error = xchk_trans_alloc(sc, 0); if (error) return error; @@ -833,6 +837,9 @@ xchk_inode( if (S_ISREG(VFS_I(sc->ip)->i_mode)) xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); + if (xchk_inode_verity_broken(sc->ip)) + xchk_ino_set_corrupt(sc, sc->sm->sm_ino); + xchk_inode_check_unlinked(sc); xchk_inode_xref(sc, sc->ip->i_ino, &di); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 4f7040c9ddf0..846a47286e06 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -573,6 +573,8 @@ xrep_dinode_flags( dip->di_nrext64_pad = 0; else if (dip->di_version >= 3) dip->di_v3_pad = 0; + if (!xfs_has_verity(mp) || !S_ISREG(mode)) + flags2 &= ~XFS_DIFLAG2_VERITY; if (flags2 & XFS_DIFLAG2_METADATA) { xfs_failaddr_t fa; @@ -1613,6 +1615,10 @@ xrep_dinode_core( if (iget_error) return iget_error; + error = xchk_inode_setup_verity(sc); + if (error) + return error; + error = xchk_trans_alloc(sc, 0); if (error) return error; @@ -2032,6 +2038,27 @@ xrep_inode_unlinked( return 0; } +/* + * If this file is a fsverity file, xchk_prepare_iscrub or xrep_dinode_core + * should have activated it. If it's still not active, then there's something + * wrong with the verity descriptor and we should turn it off. + */ +STATIC int +xrep_inode_verity( + struct xfs_scrub *sc) +{ + struct inode *inode = VFS_I(sc->ip); + + if (xchk_inode_verity_broken(sc->ip)) { + sc->ip->i_diflags2 &= ~XFS_DIFLAG2_VERITY; + inode->i_flags &= ~S_VERITY; + + xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); + } + + return 0; +} + /* Repair an inode's fields. */ int xrep_inode( @@ -2081,6 +2108,15 @@ xrep_inode( return error; } + /* + * Disable fsverity if it cannot be activated. Activation failure + * prohibits the file from being opened, so there cannot be another + * program with an open fd to what it thinks is a verity file. + */ + error = xrep_inode_verity(sc); + if (error) + return error; + /* Reconnect incore unlinked list */ error = xrep_inode_unlinked(sc); if (error) -- 2.51.2 From: "Darrick J. Wong" Record verity failures and report them through the health system. Signed-off-by: Darrick J. Wong Reviewed-by: Andrey Albershteyn Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/libxfs/xfs_health.h | 4 +++- fs/xfs/xfs_fsverity.c | 10 ++++++++++ fs/xfs/xfs_health.c | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index abe5d8e3eed7..36a87276f0b7 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -422,6 +422,7 @@ struct xfs_bulkstat { #define XFS_BS_SICK_SYMLINK (1 << 6) /* symbolic link remote target */ #define XFS_BS_SICK_PARENT (1 << 7) /* parent pointers */ #define XFS_BS_SICK_DIRTREE (1 << 8) /* directory tree structure */ +#define XFS_BS_SICK_DATA (1 << 9) /* file data */ /* * Project quota id helpers (previously projid was 16bit only diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index b31000f7190c..fa91916ad072 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -104,6 +104,7 @@ struct xfs_rtgroup; /* Don't propagate sick status to ag health summary during inactivation */ #define XFS_SICK_INO_FORGET (1 << 12) #define XFS_SICK_INO_DIRTREE (1 << 13) /* directory tree structure */ +#define XFS_SICK_INO_DATA (1 << 14) /* file data */ /* Primary evidence of health problems in a given group. */ #define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \ @@ -140,7 +141,8 @@ struct xfs_rtgroup; XFS_SICK_INO_XATTR | \ XFS_SICK_INO_SYMLINK | \ XFS_SICK_INO_PARENT | \ - XFS_SICK_INO_DIRTREE) + XFS_SICK_INO_DIRTREE | \ + XFS_SICK_INO_DATA) #define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \ XFS_SICK_INO_BMBTA_ZAPPED | \ diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index cdbe26ab88b7..5a2874236c3c 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -474,6 +474,15 @@ xfs_fsverity_write_merkle( return xfs_fsverity_write(file, position, size, buf); } +static void +xfs_fsverity_file_corrupt( + struct inode *inode, + loff_t pos, + size_t len) +{ + xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_DATA); +} + const struct fsverity_operations xfs_fsverity_ops = { .begin_enable_verity = xfs_fsverity_begin_enable, .end_enable_verity = xfs_fsverity_end_enable, @@ -481,4 +490,5 @@ const struct fsverity_operations xfs_fsverity_ops = { .read_merkle_tree_page = xfs_fsverity_read_merkle, .readahead_merkle_tree = xfs_fsverity_readahead_merkle_tree, .write_merkle_tree_block = xfs_fsverity_write_merkle, + .file_corrupt = xfs_fsverity_file_corrupt, }; diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 3c1557fb1cf0..b851651c02b2 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -487,6 +487,7 @@ static const struct ioctl_sick_map ino_map[] = { { XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR }, { XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK }, { XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE }, + { XFS_SICK_INO_DATA, XFS_BS_SICK_DATA }, }; /* Fill out bulkstat health info. */ -- 2.51.2 Report corrupted fsverity descriptor through health system. Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/libxfs/xfs_health.h | 4 +++- fs/xfs/xfs_fsverity.c | 13 ++++++++++--- fs/xfs/xfs_health.c | 1 + 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 36a87276f0b7..d8be7fe93382 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -423,6 +423,7 @@ struct xfs_bulkstat { #define XFS_BS_SICK_PARENT (1 << 7) /* parent pointers */ #define XFS_BS_SICK_DIRTREE (1 << 8) /* directory tree structure */ #define XFS_BS_SICK_DATA (1 << 9) /* file data */ +#define XFS_BS_SICK_FSVERITY (1 << 10) /* fsverity metadata */ /* * Project quota id helpers (previously projid was 16bit only diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index fa91916ad072..c534aacf3199 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -105,6 +105,7 @@ struct xfs_rtgroup; #define XFS_SICK_INO_FORGET (1 << 12) #define XFS_SICK_INO_DIRTREE (1 << 13) /* directory tree structure */ #define XFS_SICK_INO_DATA (1 << 14) /* file data */ +#define XFS_SICK_INO_FSVERITY (1 << 15) /* fsverity metadata */ /* Primary evidence of health problems in a given group. */ #define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \ @@ -142,7 +143,8 @@ struct xfs_rtgroup; XFS_SICK_INO_SYMLINK | \ XFS_SICK_INO_PARENT | \ XFS_SICK_INO_DIRTREE | \ - XFS_SICK_INO_DATA) + XFS_SICK_INO_DATA | \ + XFS_SICK_INO_FSVERITY) #define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \ XFS_SICK_INO_BMBTA_ZAPPED | \ diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index 5a2874236c3c..d89512d59328 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -197,16 +197,23 @@ xfs_fsverity_get_descriptor( return error; desc_size = be32_to_cpu(d_desc_size); - if (XFS_IS_CORRUPT(mp, desc_size > FS_VERITY_MAX_DESCRIPTOR_SIZE)) + if (XFS_IS_CORRUPT(mp, desc_size > FS_VERITY_MAX_DESCRIPTOR_SIZE)) { + xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_FSVERITY); return -ERANGE; - if (XFS_IS_CORRUPT(mp, desc_size > desc_size_pos)) + } + + if (XFS_IS_CORRUPT(mp, desc_size > desc_size_pos)) { + xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_FSVERITY); return -ERANGE; + } if (!buf_size) return desc_size; - if (XFS_IS_CORRUPT(mp, desc_size > buf_size)) + if (XFS_IS_CORRUPT(mp, desc_size > buf_size)) { + xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_FSVERITY); return -ERANGE; + } desc_pos = round_down(desc_size_pos - desc_size, blocksize); error = xfs_fsverity_read(inode, buf, desc_size, desc_pos); diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index b851651c02b2..e52ee02f7d7c 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -488,6 +488,7 @@ static const struct ioctl_sick_map ino_map[] = { { XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK }, { XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE }, { XFS_SICK_INO_DATA, XFS_BS_SICK_DATA }, + { XFS_SICK_INO_FSVERITY, XFS_BS_SICK_FSVERITY }, }; /* Fill out bulkstat health info. */ -- 2.51.2 Even though fsverity has traces, debugging issues with varying block sizes could be a bit less transparent without read/write traces. Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_fsverity.c | 10 ++++++++++ fs/xfs/xfs_trace.h | 46 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index d89512d59328..69f1c22e1ba8 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -176,6 +176,8 @@ xfs_fsverity_get_descriptor( uint32_t blocksize = i_blocksize(VFS_I(ip)); xfs_fileoff_t last_block_offset; + trace_xfs_fsverity_get_descriptor(ip); + ASSERT(inode->i_flags & S_VERITY); error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &rec, &is_empty); if (error) @@ -419,6 +421,8 @@ xfs_fsverity_read_merkle( (fsverity_metadata_offset(inode) >> PAGE_SHIFT); pgoff_t idx = index + metadata_idx; + trace_xfs_fsverity_read_merkle(XFS_I(inode), idx, PAGE_SIZE); + return generic_read_merkle_tree_page(inode, idx); } @@ -435,6 +439,8 @@ xfs_fsverity_readahead_merkle_tree( (fsverity_metadata_offset(inode) >> PAGE_SHIFT); pgoff_t idx = index + metadata_idx; + trace_xfs_fsverity_read_merkle(XFS_I(inode), idx, PAGE_SIZE); + generic_readahead_merkle_tree(inode, idx, nr_pages); } @@ -456,6 +462,8 @@ xfs_fsverity_write_merkle( const char *p; unsigned int i; + trace_xfs_fsverity_write_merkle(XFS_I(inode), position, size); + if (position + size > inode->i_sb->s_maxbytes) return -EFBIG; @@ -487,6 +495,8 @@ xfs_fsverity_file_corrupt( loff_t pos, size_t len) { + trace_xfs_fsverity_file_corrupt(XFS_I(inode), pos, len); + xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_DATA); } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f70afbf3cb19..a5562921611a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -5906,6 +5906,52 @@ DEFINE_EVENT(xfs_freeblocks_resv_class, name, \ DEFINE_FREEBLOCKS_RESV_EVENT(xfs_freecounter_reserved); DEFINE_FREEBLOCKS_RESV_EVENT(xfs_freecounter_enospc); +TRACE_EVENT(xfs_fsverity_get_descriptor, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino) +); + +DECLARE_EVENT_CLASS(xfs_fsverity_class, + TP_PROTO(struct xfs_inode *ip, u64 pos, unsigned int length), + TP_ARGS(ip, pos, length), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u64, pos) + __field(unsigned int, length) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->pos = pos; + __entry->length = length; + ), + TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pos, + __entry->length) +) + +#define DEFINE_FSVERITY_EVENT(name) \ +DEFINE_EVENT(xfs_fsverity_class, name, \ + TP_PROTO(struct xfs_inode *ip, u64 pos, unsigned int length), \ + TP_ARGS(ip, pos, length)) +DEFINE_FSVERITY_EVENT(xfs_fsverity_read_merkle); +DEFINE_FSVERITY_EVENT(xfs_fsverity_write_merkle); +DEFINE_FSVERITY_EVENT(xfs_fsverity_file_corrupt); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH -- 2.51.2 Finalize fs-verity integration in XFS by making kernel fs-verity aware with ro-compat flag. Signed-off-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong [djwong: add spaces] Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_format.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index d67b404964fc..f5e43909f054 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -378,8 +378,9 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ - XFS_SB_FEAT_RO_COMPAT_REFLINK| \ - XFS_SB_FEAT_RO_COMPAT_INOBTCNT) + XFS_SB_FEAT_RO_COMPAT_REFLINK | \ + XFS_SB_FEAT_RO_COMPAT_INOBTCNT | \ + XFS_SB_FEAT_RO_COMPAT_VERITY) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( -- 2.51.2