Reported verification errors to fsnotify through recently added fserror interface. Signed-off-by: Andrey Albershteyn Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/verity/verify.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 4004a1d42875..4ea494da694f 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -9,6 +9,7 @@ #include #include +#include #define FS_VERITY_MAX_PENDING_BLOCKS 2 @@ -203,6 +204,8 @@ static bool verify_data_block(struct fsverity_info *vi, * to verify that any data blocks fully past EOF are all zeroes. */ if (memchr_inv(dblock->data, 0, params->block_size)) { + fserror_report_data_lost(inode, data_pos, + params->block_size, GFP_NOFS); fsverity_err(inode, "FILE CORRUPTED! Data past EOF is not zeroed"); return false; @@ -312,6 +315,7 @@ static bool verify_data_block(struct fsverity_info *vi, data_pos, level - 1, params->hash_alg->name, hsize, want_hash, params->hash_alg->name, hsize, level == 0 ? dblock->real_hash : real_hash); + fserror_report_data_lost(inode, data_pos, params->block_size, GFP_NOFS); error: for (; level > 0; level--) { kunmap_local(hblocks[level - 1].addr); -- 2.51.2 This function will be used by XFS's scrub to force fsverity activation, therefore, to read fsverity context. Signed-off-by: Andrey Albershteyn Reviewed-by: "Darrick J. Wong" --- fs/verity/open.c | 5 +++-- include/linux/fsverity.h | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/verity/open.c b/fs/verity/open.c index dfa0d1afe0fe..0483db672526 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -344,7 +344,7 @@ int fsverity_get_descriptor(struct inode *inode, return 0; } -static int ensure_verity_info(struct inode *inode) +int fsverity_ensure_verity_info(struct inode *inode) { struct fsverity_info *vi = fsverity_get_info(inode), *found; struct fsverity_descriptor *desc; @@ -380,12 +380,13 @@ static int ensure_verity_info(struct inode *inode) kfree(desc); return err; } +EXPORT_SYMBOL_GPL(fsverity_ensure_verity_info); int __fsverity_file_open(struct inode *inode, struct file *filp) { if (filp->f_mode & FMODE_WRITE) return -EPERM; - return ensure_verity_info(inode); + return fsverity_ensure_verity_info(inode); } EXPORT_SYMBOL_GPL(__fsverity_file_open); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index a8f9aa75b792..2e3a90aff11e 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -309,6 +309,19 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp) return 0; } +/** + * fsverity_ensure_verity_info() - create verity info if it's not in memory yet + * @inode: the inode for which verity info should be created + * + * Ensure this inode has verity info attached to it. Read fsverity descriptor + * and creates verity based on that. Inodes opened outside of + * file_operations->open will not have any verity info attached. This + * info is required for any fsverity related operations. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_ensure_verity_info(struct inode *inode); + void fsverity_cleanup_inode(struct inode *inode); struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index); -- 2.51.2 Compute the hash of one filesystem block's worth of zeros. A filesystem implementation can decide to elide merkle tree blocks containing only this hash and synthesize the contents at read time. Let's pretend that there's a file containing six data blocks and whose merkle tree looks roughly like this: root +--leaf0 | +--data0 | +--data1 | `--data2 `--leaf1 +--data3 +--data4 `--data5 If data[0-2] are sparse holes, then leaf0 will contain a repeating sequence of @zero_digest. Therefore, leaf0 need not be written to disk because its contents can be synthesized. A subsequent xfs patch will use this to reduce the size of the merkle tree when dealing with sparse gold master disk images and the like. Add a helper to pre-fill folio with hashes of empty blocks. This will be used by iomap to synthesize blocks full of zero hashes on the fly. Signed-off-by: Darrick J. Wong Signed-off-by: Andrey Albershteyn --- fs/verity/fsverity_private.h | 3 +++ fs/verity/open.c | 3 +++ fs/verity/pagecache.c | 22 ++++++++++++++++++++++ include/linux/fsverity.h | 8 ++++++++ 4 files changed, 36 insertions(+) diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index 6e6854c19078..35636c1e2c41 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -53,6 +53,9 @@ struct merkle_tree_params { u64 tree_size; /* Merkle tree size in bytes */ unsigned long tree_pages; /* Merkle tree size in pages */ + /* the hash of a merkle block-sized buffer of zeroes */ + u8 zero_digest[FS_VERITY_MAX_DIGEST_SIZE]; + /* * Starting block index for each tree level, ordered from leaf level (0) * to root level ('num_levels - 1') diff --git a/fs/verity/open.c b/fs/verity/open.c index 0483db672526..94407a37aa08 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -153,6 +153,9 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, goto out_err; } + fsverity_hash_block(params, page_address(ZERO_PAGE(0)), + params->zero_digest); + params->tree_size = offset << log_blocksize; params->tree_pages = PAGE_ALIGN(params->tree_size) >> PAGE_SHIFT; return 0; diff --git a/fs/verity/pagecache.c b/fs/verity/pagecache.c index 1819314ecaa3..c338dfa3eb18 100644 --- a/fs/verity/pagecache.c +++ b/fs/verity/pagecache.c @@ -2,6 +2,7 @@ /* * Copyright 2019 Google LLC */ +#include "fsverity_private.h" #include #include @@ -56,3 +57,24 @@ void generic_readahead_merkle_tree(struct inode *inode, pgoff_t index, folio_put(folio); } EXPORT_SYMBOL_GPL(generic_readahead_merkle_tree); + +/** + * fsverity_fill_zerohash() - fill folio with hashes of zero data block + * @folio: folio to fill + * @poff: offset in the folio to start + * @plen: length of the range to fill with hashes + * @vi: fsverity info + */ +void fsverity_fill_zerohash(struct folio *folio, size_t poff, size_t plen, + struct fsverity_info *vi) +{ + size_t offset = poff; + + WARN_ON_ONCE(!IS_ALIGNED(poff, vi->tree_params.digest_size)); + WARN_ON_ONCE(!IS_ALIGNED(plen, vi->tree_params.digest_size)); + + for (; offset < (poff + plen); offset += vi->tree_params.digest_size) + memcpy_to_folio(folio, offset, vi->tree_params.zero_digest, + vi->tree_params.digest_size); +} +EXPORT_SYMBOL_GPL(fsverity_fill_zerohash); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 2e3a90aff11e..03f703e5bb40 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -201,6 +201,8 @@ bool fsverity_verify_blocks(struct fsverity_info *vi, struct folio *folio, size_t len, size_t offset); void fsverity_verify_bio(struct fsverity_info *vi, struct bio *bio); void fsverity_enqueue_verify_work(struct work_struct *work); +void fsverity_fill_zerohash(struct folio *folio, size_t poff, size_t plen, + struct fsverity_info *vi); #else /* !CONFIG_FS_VERITY */ @@ -281,6 +283,12 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) WARN_ON_ONCE(1); } +static inline void fsverity_fill_zerohash(struct folio *folio, size_t poff, + size_t plen, struct fsverity_info *vi) +{ + WARN_ON_ONCE(1); +} + #endif /* !CONFIG_FS_VERITY */ static inline bool fsverity_verify_folio(struct fsverity_info *vi, -- 2.51.2 Let filesystem iterate over hashes in the block and check if these are hashes of zeroed data blocks. XFS will use this to decide if it want to store tree block full of these hashes. Signed-off-by: Andrey Albershteyn Reviewed-by: "Darrick J. Wong" --- fs/btrfs/verity.c | 6 +++++- fs/ext4/verity.c | 4 +++- fs/f2fs/verity.c | 4 +++- fs/verity/enable.c | 4 +++- include/linux/fsverity.h | 6 +++++- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index 0062b3a55781..6d8d3808d75d 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -773,11 +773,15 @@ static struct page *btrfs_read_merkle_tree_page(struct inode *inode, * @buf: Merkle tree block to write * @pos: the position of the block in the Merkle tree (in bytes) * @size: the Merkle tree block size (in bytes) + * @zero_digest: the hash of a merkle block-sized buffer of zeroes + * @digest_size: size of zero_digest, in bytes * * Returns 0 on success or negative error code on failure */ static int btrfs_write_merkle_tree_block(struct file *file, const void *buf, - u64 pos, unsigned int size) + u64 pos, unsigned int size, + const u8 *zero_digest, + unsigned int digest_size) { struct inode *inode = file_inode(file); loff_t merkle_pos = merkle_file_pos(inode); diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index ca61da53f313..347945ac23a4 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -374,7 +374,9 @@ static void ext4_readahead_merkle_tree(struct inode *inode, pgoff_t index, } static int ext4_write_merkle_tree_block(struct file *file, const void *buf, - u64 pos, unsigned int size) + u64 pos, unsigned int size, + const u8 *zero_digest, + unsigned int digest_size) { pos += ext4_verity_metadata_pos(file_inode(file)); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 92ebcc19cab0..b3b3e71604ac 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -270,7 +270,9 @@ static void f2fs_readahead_merkle_tree(struct inode *inode, pgoff_t index, } static int f2fs_write_merkle_tree_block(struct file *file, const void *buf, - u64 pos, unsigned int size) + u64 pos, unsigned int size, + const u8 *zero_digest, + unsigned int digest_size) { pos += f2fs_verity_metadata_pos(file_inode(file)); diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 42dfed1ce0ce..ad4ff71d7dd9 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -50,7 +50,9 @@ static int write_merkle_tree_block(struct file *file, const u8 *buf, int err; err = inode->i_sb->s_vop->write_merkle_tree_block(file, buf, pos, - params->block_size); + params->block_size, + params->zero_digest, + params->digest_size); if (err) fsverity_err(inode, "Error %d writing Merkle tree block %lu", err, index); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 03f703e5bb40..881db75f1cba 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -124,6 +124,8 @@ struct fsverity_operations { * @buf: the Merkle tree block to write * @pos: the position of the block in the Merkle tree (in bytes) * @size: the Merkle tree block size (in bytes) + * @zero_digest: the hash of a merkle block-sized buffer of zeroes + * @digest_size: size of zero_digest, in bytes * * This is only called between ->begin_enable_verity() and * ->end_enable_verity(). @@ -131,7 +133,9 @@ struct fsverity_operations { * Return: 0 on success, -errno on failure */ int (*write_merkle_tree_block)(struct file *file, const void *buf, - u64 pos, unsigned int size); + u64 pos, unsigned int size, + const u8 *zero_digest, + unsigned int digest_size); }; #ifdef CONFIG_FS_VERITY -- 2.51.2 This is the same function to read from pageache. XFS will also need this, so move this to core fsverity. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/ext4/verity.c | 32 +++----------------------------- fs/f2fs/verity.c | 30 +----------------------------- fs/verity/pagecache.c | 33 +++++++++++++++++++++++++++++++++ include/linux/fsverity.h | 2 ++ 4 files changed, 39 insertions(+), 58 deletions(-) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 347945ac23a4..ac5c133f5529 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -34,32 +34,6 @@ static inline loff_t ext4_verity_metadata_pos(const struct inode *inode) return round_up(inode->i_size, 65536); } -/* - * Read some verity metadata from the inode. __vfs_read() can't be used because - * we need to read beyond i_size. - */ -static int pagecache_read(struct inode *inode, void *buf, size_t count, - loff_t pos) -{ - while (count) { - struct folio *folio; - size_t n; - - folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, - NULL); - if (IS_ERR(folio)) - return PTR_ERR(folio); - - n = memcpy_from_file_folio(buf, folio, pos, count); - folio_put(folio); - - buf += n; - pos += n; - count -= n; - } - return 0; -} - /* * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. * kernel_write() can't be used because the file descriptor is readonly. @@ -311,8 +285,8 @@ static int ext4_get_verity_descriptor_location(struct inode *inode, goto bad; desc_size_pos -= sizeof(desc_size_disk); - err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk), - desc_size_pos); + err = fsverity_pagecache_read(inode, &desc_size_disk, + sizeof(desc_size_disk), desc_size_pos); if (err) return err; desc_size = le32_to_cpu(desc_size_disk); @@ -352,7 +326,7 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf, if (buf_size) { if (desc_size > buf_size) return -ERANGE; - err = pagecache_read(inode, buf, desc_size, desc_pos); + err = fsverity_pagecache_read(inode, buf, desc_size, desc_pos); if (err) return err; } diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index b3b3e71604ac..5ea0a9b40443 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -36,34 +36,6 @@ static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode) return round_up(inode->i_size, 65536); } -/* - * Read some verity metadata from the inode. __vfs_read() can't be used because - * we need to read beyond i_size. - */ -static int pagecache_read(struct inode *inode, void *buf, size_t count, - loff_t pos) -{ - while (count) { - size_t n = min_t(size_t, count, - PAGE_SIZE - offset_in_page(pos)); - struct page *page; - - page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, - NULL); - if (IS_ERR(page)) - return PTR_ERR(page); - - memcpy_from_page(buf, page, offset_in_page(pos), n); - - put_page(page); - - buf += n; - pos += n; - count -= n; - } - return 0; -} - /* * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. * kernel_write() can't be used because the file descriptor is readonly. @@ -248,7 +220,7 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, if (buf_size) { if (size > buf_size) return -ERANGE; - res = pagecache_read(inode, buf, size, pos); + res = fsverity_pagecache_read(inode, buf, size, pos); if (res) return res; } diff --git a/fs/verity/pagecache.c b/fs/verity/pagecache.c index c338dfa3eb18..da8606d58637 100644 --- a/fs/verity/pagecache.c +++ b/fs/verity/pagecache.c @@ -78,3 +78,36 @@ void fsverity_fill_zerohash(struct folio *folio, size_t poff, size_t plen, vi->tree_params.digest_size); } EXPORT_SYMBOL_GPL(fsverity_fill_zerohash); + +/** + * fsverity_pagecache_read() - read page and copy data to buffer + * @inode: copy from this inode's address space + * @buf: buffer to copy to + * @count: number of bytes to copy + * @pos: position of the folio to copy from + * + * Read some verity metadata from the inode. __vfs_read() can't be used because + * we need to read beyond i_size. + */ +int fsverity_pagecache_read(struct inode *inode, void *buf, size_t count, + loff_t pos) +{ + while (count) { + struct folio *folio; + size_t n; + + folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, + NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + + n = memcpy_from_file_folio(buf, folio, pos, count); + folio_put(folio); + + buf += n; + pos += n; + count -= n; + } + return 0; +} +EXPORT_SYMBOL_GPL(fsverity_pagecache_read); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 881db75f1cba..9a2f9c055a49 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -339,5 +339,7 @@ void fsverity_cleanup_inode(struct inode *inode); struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index); void generic_readahead_merkle_tree(struct inode *inode, pgoff_t index, unsigned long nr_pages); +int fsverity_pagecache_read(struct inode *inode, void *buf, size_t count, + loff_t pos); #endif /* _LINUX_FSVERITY_H */ -- 2.51.2 This flag indicates that I/O is for fsverity metadata. In the write path skip i_size check and i_size updates as metadata is past EOF. In writeback don't update i_size and continue writeback if even folio is beyond EOF. In read path don't zero fsverity folios, again they are past EOF. The iomap_block_needs_zeroing() is also called from write path. For folios of larger order we don't want to zero out pages in the folio as these could contain other merkle tree blocks. For fsverity, filesystem will request to read PAGE_SIZE memory regions. For data folios, iomap will zero the rest of the folio for anything which is beyond EOF. We don't want this for fsverity folios. Signed-off-by: Andrey Albershteyn Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 43 +++++++++++++++++++++++++++++++++--------- fs/iomap/trace.h | 3 ++- include/linux/iomap.h | 8 ++++++++ 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index e4b6886e5c3c..a80fcb598cc8 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -353,9 +353,26 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, { const struct iomap *srcmap = iomap_iter_srcmap(iter); - return srcmap->type != IOMAP_MAPPED || - (srcmap->flags & IOMAP_F_NEW) || - pos >= i_size_read(iter->inode); + /* + * If this block has not been written, there's nothing to read + */ + if (srcmap->type != IOMAP_MAPPED) + return true; + + /* + * Newly allocated blocks have not been written + */ + if (srcmap->flags & IOMAP_F_NEW) + return true; + + /* + * fsverity metadata is stored past i_size, we need to read it instead of + * zeroing + */ + if (srcmap->flags & IOMAP_F_FSVERITY) + return false; + + return pos >= i_size_read(iter->inode); } /** @@ -1167,13 +1184,14 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i, * unlock and release the folio. */ old_size = iter->inode->i_size; - if (pos + written > old_size) { + if (pos + written > old_size && + !(iter->iomap.flags & IOMAP_F_FSVERITY)) { i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } __iomap_put_folio(iter, write_ops, written, folio); - if (old_size < pos) + if (old_size < pos && !(iter->iomap.flags & IOMAP_F_FSVERITY)) pagecache_isize_extended(iter->inode, old_size, pos); cond_resched(); @@ -1801,13 +1819,20 @@ static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, * Check interaction of the folio with the file end. * * If the folio is entirely beyond i_size, return false. If it straddles - * i_size, adjust end_pos and zero all data beyond i_size. + * i_size, adjust end_pos and zero all data beyond i_size. Don't skip fsverity + * folios as those are beyond i_size. */ -static bool iomap_writeback_handle_eof(struct folio *folio, struct inode *inode, - u64 *end_pos) +static bool iomap_writeback_handle_eof(struct folio *folio, + struct iomap_writepage_ctx *wpc, u64 *end_pos) { + struct inode *inode = wpc->inode; u64 isize = i_size_read(inode); + if (wpc->iomap.flags & IOMAP_F_FSVERITY) { + WARN_ON_ONCE(folio_pos(folio) < isize); + return true; + } + if (*end_pos > isize) { size_t poff = offset_in_folio(folio, isize); pgoff_t end_index = isize >> PAGE_SHIFT; @@ -1873,7 +1898,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) trace_iomap_writeback_folio(inode, pos, folio_size(folio)); - if (!iomap_writeback_handle_eof(folio, inode, &end_pos)) + if (!iomap_writeback_handle_eof(folio, wpc, &end_pos)) return 0; WARN_ON_ONCE(end_pos <= pos); diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 532787277b16..5252051cc137 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -118,7 +118,8 @@ DEFINE_RANGE_EVENT(iomap_zero_iter); { IOMAP_F_ATOMIC_BIO, "ATOMIC_BIO" }, \ { IOMAP_F_PRIVATE, "PRIVATE" }, \ { IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }, \ - { IOMAP_F_STALE, "STALE" } + { IOMAP_F_STALE, "STALE" }, \ + { IOMAP_F_FSVERITY, "FSVERITY" } #define IOMAP_DIO_STRINGS \ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 531f9ebdeeae..4506a99d5285 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -87,6 +87,14 @@ struct vm_fault; #define IOMAP_F_INTEGRITY 0 #endif /* CONFIG_BLK_DEV_INTEGRITY */ +/* + * Indicates reads and writes of fsverity metadata. + * + * Fsverity metadata is stored after the regular file data and thus beyond + * i_size. + */ +#define IOMAP_F_FSVERITY (1U << 10) + /* * Flag reserved for file system specific usage */ -- 2.51.2 Obtain fsverity info for folios with file data and fsverity metadata. Filesystem can pass vi down to ioend and then to fsverity for verification. This is different from other filesystems ext4, f2fs, btrfs supporting fsverity, these filesystems don't need fsverity_info for reading fsverity metadata. While reading merkle tree iomap requires fsverity info to synthesize hashes for zeroed data block. fsverity metadata has two kinds of holes - ones in merkle tree and one after fsverity descriptor. Merkle tree holes are blocks full of hashes of zeroed data blocks. These are not stored on the disk but synthesized on the fly. This saves a bit of space for sparse files. Due to this iomap also need to lookup fsverity_info for folios with fsverity metadata. ->vi has a hash of the zeroed data block which will be used to fill the merkle tree block. The hole past descriptor is interpreted as end of metadata region. As we don't have EOF here we use this hole as an indication that rest of the folio is empty. This patch marks rest of the folio beyond fsverity descriptor as uptodate. For file data, fsverity needs to verify consistency of the whole file against the root hash, hashes of holes are included in the merkle tree. Verify them too. Issue reading of fsverity merkle tree on the fsverity inodes. This way metadata will be available at I/O completion time. Signed-off-by: Andrey Albershteyn --- fs/iomap/buffered-io.c | 41 +++++++++++++++++++++++++++++++++++++++-- include/linux/iomap.h | 2 ++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a80fcb598cc8..7ac319618f8e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "internal.h" #include "trace.h" @@ -561,9 +562,27 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, if (plen == 0) return 0; - /* zero post-eof blocks as the page may be mapped */ - if (iomap_block_needs_zeroing(iter, pos)) { + /* + * Handling of fsverity "holes". We hit this for two case: + * 1. No need to go further, the hole after fsverity + * descriptor is the end of the fsverity metadata. + * + * 2. This folio contains merkle tree blocks which need to be + * synthesized. If we already have fsverity info (ctx->vi) + * synthesize these blocks. + */ + if ((iomap->flags & IOMAP_F_FSVERITY) && + iomap->type == IOMAP_HOLE) { + if (ctx->vi) + fsverity_fill_zerohash(folio, poff, plen, + ctx->vi); + iomap_set_range_uptodate(folio, poff, plen); + } else if (iomap_block_needs_zeroing(iter, pos)) { + /* zero post-eof blocks as the page may be mapped */ folio_zero_range(folio, poff, plen); + if (ctx->vi && + !fsverity_verify_blocks(ctx->vi, folio, plen, poff)) + return -EIO; iomap_set_range_uptodate(folio, poff, plen); } else { if (!*bytes_submitted) @@ -614,6 +633,15 @@ void iomap_read_folio(const struct iomap_ops *ops, trace_iomap_readpage(iter.inode, 1); + /* + * Fetch fsverity_info for both data and fsverity metadata, as iomap + * needs zeroed hash for merkle tree block synthesis + */ + ctx->vi = fsverity_get_info(iter.inode); + if (ctx->vi && iter.pos < i_size_read(iter.inode)) + fsverity_readahead(ctx->vi, folio->index, + folio_nr_pages(folio)); + while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_read_folio_iter(&iter, ctx, &bytes_submitted); @@ -681,6 +709,15 @@ void iomap_readahead(const struct iomap_ops *ops, trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); + /* + * Fetch fsverity_info for both data and fsverity metadata, as iomap + * needs zeroed hash for merkle tree block synthesis + */ + ctx->vi = fsverity_get_info(iter.inode); + if (ctx->vi && iter.pos < i_size_read(iter.inode)) + fsverity_readahead(ctx->vi, readahead_index(rac), + readahead_count(rac)); + while (iomap_iter(&iter, ops) > 0) iter.status = iomap_readahead_iter(&iter, ctx, &cur_bytes_submitted); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4506a99d5285..4d9202cae29f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -435,6 +435,7 @@ struct iomap_ioend { loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ void *io_private; /* file system private data */ + struct fsverity_info *io_vi; /* fsverity info */ struct bio io_bio; /* MUST BE LAST! */ }; @@ -509,6 +510,7 @@ struct iomap_read_folio_ctx { struct readahead_control *rac; void *read_ctx; loff_t read_ctx_file_offset; + struct fsverity_info *vi; }; struct iomap_read_ops { -- 2.51.2 This is just a wrapper around iomap_file_buffered_write() to create necessary iterator over metadata. Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/iomap/buffered-io.c | 25 +++++++++++++++++++++++++ include/linux/iomap.h | 3 +++ 2 files changed, 28 insertions(+) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7ac319618f8e..0f89225dc3d7 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1287,6 +1287,31 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); +int iomap_fsverity_write(struct file *file, loff_t pos, size_t length, + const void *buf, const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops) +{ + int ret; + struct iov_iter iiter; + struct kvec kvec = { + .iov_base = (void *)buf, + .iov_len = length, + }; + struct kiocb iocb = { + .ki_filp = file, + .ki_ioprio = get_current_ioprio(), + .ki_pos = pos, + }; + + iov_iter_kvec(&iiter, WRITE, &kvec, 1, length); + + ret = iomap_file_buffered_write(&iocb, &iiter, ops, write_ops, NULL); + if (ret < 0) + return ret; + return ret == length ? 0 : -EIO; +} +EXPORT_SYMBOL_GPL(iomap_fsverity_write); + static void iomap_write_delalloc_ifs_punch(struct inode *inode, struct folio *folio, loff_t start_byte, loff_t end_byte, struct iomap *iomap, iomap_punch_t punch) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4d9202cae29f..83586f09f365 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -359,6 +359,9 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private); +int iomap_fsverity_write(struct file *file, loff_t pos, size_t length, + const void *buf, const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops); void iomap_read_folio(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx, void *private); void iomap_readahead(const struct iomap_ops *ops, -- 2.51.2 Introduce XFS_DIFLAG2_VERITY for inodes with fsverity. This flag indicates that inode has fs-verity enabled (i.e. descriptor exist, tree is built and file is read-only). Introduce XFS_SB_FEAT_RO_COMPAT_VERITY for filesystems having fsverity inodes. As on-disk changes applies to fsverity inodes only, let older kernels read-only access. This will be enabled in the further patch after full fsverity support. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_format.h | 30 +++++++++++++++++++++++++++++- fs/xfs/libxfs/xfs_inode_buf.c | 8 ++++++++ fs/xfs/libxfs/xfs_inode_util.c | 2 ++ fs/xfs/libxfs/xfs_sb.c | 2 ++ fs/xfs/xfs_iops.c | 2 ++ fs/xfs/xfs_mount.h | 2 ++ 6 files changed, 45 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 779dac59b1f3..4dff29659e40 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -374,6 +374,7 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ #define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */ +#define XFS_SB_FEAT_RO_COMPAT_VERITY (1 << 4) /* fs-verity */ #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ @@ -1230,16 +1231,21 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) */ #define XFS_DIFLAG2_METADATA_BIT 5 +/* inodes sealed with fs-verity */ +#define XFS_DIFLAG2_VERITY_BIT 6 + #define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT) #define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT) #define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT) #define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT) #define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT) #define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT) +#define XFS_DIFLAG2_VERITY (1ULL << XFS_DIFLAG2_VERITY_BIT) #define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA | \ + XFS_DIFLAG2_VERITY) static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { @@ -2021,4 +2027,26 @@ struct xfs_acl { #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) +/* + * At maximum of 8 levels with 128 hashes per block (32 bytes SHA-256) maximum + * tree size is ((128^8 − 1)/(128 − 1)) = 567*10^12 blocks. This should fit in + * 53 bits address space. + * + * At this Merkle tree size we can cover 295EB large file. This is much larger + * than the currently supported file size. + * + * For sha512 the largest file we can cover ends at 1 << 50 offset, this is also + * good. + */ +#define XFS_FSVERITY_LARGEST_FILE ((loff_t)1ULL << 53) + +/* + * Alignment of the fsverity metadata placement. This is largest supported PAGE + * SIZE for fsverity. This is used to space out data and metadata in page cache. + * The spacing is necessary for non-exposure of metadata to userspace and + * correct merkle tree synethesis in the iomap. + */ +#define XFS_FSVERITY_START_ALIGN (65536) + + #endif /* __XFS_FORMAT_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 3794e5412eba..f2181c1bed54 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -760,6 +760,14 @@ xfs_dinode_verify( !xfs_has_rtreflink(mp)) return __this_address; + /* only regular files can have fsverity */ + if (flags2 & XFS_DIFLAG2_VERITY) { + if (!xfs_has_verity(mp)) + return __this_address; + if (!S_ISREG(mode)) + return __this_address; + } + if (xfs_has_zoned(mp) && dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)) { if (be32_to_cpu(dip->di_used_blocks) > mp->m_sb.sb_rgextents) diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index 551fa51befb6..6b1e20a4bb9b 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -126,6 +126,8 @@ xfs_ip2xflags( flags |= FS_XFLAG_DAX; if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) flags |= FS_XFLAG_COWEXTSIZE; + if (ip->i_diflags2 & XFS_DIFLAG2_VERITY) + flags |= FS_XFLAG_VERITY; } if (xfs_inode_has_attr_fork(ip)) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 47322adb7690..a15510ebd2f1 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -165,6 +165,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_REFLINK; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT) features |= XFS_FEAT_INOBTCNT; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_VERITY) + features |= XFS_FEAT_VERITY; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) features |= XFS_FEAT_FTYPE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 208543e57eda..ca369eb96561 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1415,6 +1415,8 @@ xfs_diflags_to_iflags( flags |= S_NOATIME; if (init && xfs_inode_should_enable_dax(ip)) flags |= S_DAX; + if (xflags & FS_XFLAG_VERITY) + flags |= S_VERITY; /* * S_DAX can only be set during inode initialization and is never set by diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index ddd4028be8d6..07f6aa3c3f26 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -385,6 +385,7 @@ typedef struct xfs_mount { #define XFS_FEAT_EXCHANGE_RANGE (1ULL << 27) /* exchange range */ #define XFS_FEAT_METADIR (1ULL << 28) /* metadata directory tree */ #define XFS_FEAT_ZONED (1ULL << 29) /* zoned RT device */ +#define XFS_FEAT_VERITY (1ULL << 30) /* fs-verity */ /* Mount features */ #define XFS_FEAT_NOLIFETIME (1ULL << 47) /* disable lifetime hints */ @@ -442,6 +443,7 @@ __XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE) __XFS_HAS_FEAT(metadir, METADIR) __XFS_HAS_FEAT(zoned, ZONED) __XFS_HAS_FEAT(nolifetime, NOLIFETIME) +__XFS_HAS_FEAT(verity, VERITY) static inline bool xfs_has_rtgroups(const struct xfs_mount *mp) { -- 2.51.2 fs-verity will read and attach metadata (not the tree itself) from a disk for those inodes which already have fs-verity enabled. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_file.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 6246f34df9fd..a980ac5196a8 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -37,6 +37,7 @@ #include #include #include +#include static const struct vm_operations_struct xfs_file_vm_ops; @@ -1640,11 +1641,18 @@ xfs_file_open( struct inode *inode, struct file *file) { + int error; + if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; if (xfs_get_atomic_write_min(XFS_I(inode)) > 0) file->f_mode |= FMODE_CAN_ATOMIC_WRITE; + + error = fsverity_file_open(inode, file); + if (error) + return error; + return generic_file_open(inode, file); } -- 2.51.2 fs-verity doesn't support DAX. Forbid filesystem to enable DAX on inodes which already have fs-verity enabled. The opposite is checked when fs-verity is enabled, it won't be enabled if DAX is. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_iops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ca369eb96561..17efc83a86ed 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1387,6 +1387,8 @@ xfs_inode_should_enable_dax( return false; if (!xfs_inode_supports_dax(ip)) return false; + if (ip->i_diflags2 & XFS_DIFLAG2_VERITY) + return false; if (xfs_has_dax_always(ip->i_mount)) return true; if (ip->i_diflags2 & XFS_DIFLAG2_DAX) -- 2.51.2 The direct path is not supported on verity files. Attempts to use direct I/O path on such files should fall back to buffered I/O path. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a980ac5196a8..6fa9835f9531 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -282,7 +282,8 @@ xfs_file_dax_read( struct kiocb *iocb, struct iov_iter *to) { - struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); ssize_t ret = 0; trace_xfs_file_dax_read(iocb, to); @@ -333,6 +334,14 @@ xfs_file_read_iter( if (xfs_is_shutdown(mp)) return -EIO; + /* + * In case fs-verity is enabled, we also fallback to the buffered read + * from the direct read path. Therefore, IOCB_DIRECT is set and need to + * be cleared (see generic_file_read_iter()) + */ + if (fsverity_active(inode)) + iocb->ki_flags &= ~IOCB_DIRECT; + if (IS_DAX(inode)) ret = xfs_file_dax_read(iocb, to); else if (iocb->ki_flags & IOCB_DIRECT) -- 2.51.2 For write/writeback set IOMAP_F_FSVERITY flag telling iomap to not update inode size and to not skip folios beyond EOF. Initiate fsverity writeback with IOMAP_F_FSVERITY set to tell iomap should not skip folio that is dirty beyond EOF. In read path let iomap know that we are reading fsverity metadata. So, treat holes in the tree as request to synthesize tree blocks and hole after descriptor as end of the fsverity region. Introduce a new inode flag meaning that merkle tree is being build on the inode. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_bmap.c | 7 +++++++ fs/xfs/xfs_aops.c | 16 +++++++++++++++- fs/xfs/xfs_fsverity.c | 21 +++++++++++++++++++++ fs/xfs/xfs_fsverity.h | 20 ++++++++++++++++++++ fs/xfs/xfs_inode.h | 6 ++++++ fs/xfs/xfs_iomap.c | 15 +++++++++++++-- 7 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 fs/xfs/xfs_fsverity.c create mode 100644 fs/xfs/xfs_fsverity.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 9f7133e02576..38b7f51e5d84 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -149,6 +149,7 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o +xfs-$(CONFIG_FS_VERITY) += xfs_fsverity.o # notify failure ifeq ($(CONFIG_MEMORY_FAILURE),y) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7a4c8f1aa76c..931d02678d19 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -41,6 +41,8 @@ #include "xfs_inode_util.h" #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" +#include "xfs_fsverity.h" +#include struct kmem_cache *xfs_bmap_intent_cache; @@ -4451,6 +4453,11 @@ xfs_bmapi_convert_one_delalloc( XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); XFS_STATS_INC(mp, xs_xstrat_quick); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION) && + XFS_FSB_TO_B(mp, bma.got.br_startoff) >= + xfs_fsverity_metadata_offset(ip)) + flags |= IOMAP_F_FSVERITY; + ASSERT(!isnullstartblock(bma.got.br_startblock)); xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, xfs_iomap_inode_sequence(ip, flags)); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f279055fcea0..9503252a0fa4 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -22,6 +22,7 @@ #include "xfs_icache.h" #include "xfs_zone_alloc.h" #include "xfs_rtgroup.h" +#include "xfs_fsverity.h" #include struct xfs_writepage_ctx { @@ -339,12 +340,16 @@ xfs_map_blocks( int retries = 0; int error = 0; unsigned int *seq; + unsigned int iomap_flags = 0; if (xfs_is_shutdown(mp)) return -EIO; XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + iomap_flags |= IOMAP_F_FSVERITY; + /* * COW fork blocks can overlap data fork blocks even if the blocks * aren't shared. COW I/O always takes precedent, so we must always @@ -432,7 +437,8 @@ xfs_map_blocks( isnullstartblock(imap.br_startblock)) goto allocate_blocks; - xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); + xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, iomap_flags, + XFS_WPC(wpc)->data_seq); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: @@ -705,6 +711,14 @@ xfs_vm_writepages( }, }; + /* + * Writeback does not work for folios past EOF, let it know that + * I/O happens for fsverity metadata and this restriction need + * to be skipped + */ + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + wpc.ctx.iomap.flags |= IOMAP_F_FSVERITY; + return iomap_writepages(&wpc.ctx); } } diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c new file mode 100644 index 000000000000..6e6a8636a577 --- /dev/null +++ b/fs/xfs/xfs_fsverity.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2026 Red Hat, Inc. + */ +#include "xfs_platform.h" +#include "xfs_format.h" +#include "xfs_inode.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_fsverity.h" +#include "xfs_fsverity.h" +#include +#include + +loff_t +xfs_fsverity_metadata_offset( + const struct xfs_inode *ip) +{ + return round_up(i_size_read(VFS_IC(ip)), XFS_FSVERITY_START_ALIGN); +} diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h new file mode 100644 index 000000000000..5771db2cd797 --- /dev/null +++ b/fs/xfs/xfs_fsverity.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2026 Red Hat, Inc. + */ +#ifndef __XFS_FSVERITY_H__ +#define __XFS_FSVERITY_H__ + +#include "xfs_platform.h" + +#ifdef CONFIG_FS_VERITY +loff_t xfs_fsverity_metadata_offset(const struct xfs_inode *ip); +#else +static inline loff_t xfs_fsverity_metadata_offset(const struct xfs_inode *ip) +{ + WARN_ON_ONCE(1); + return ULLONG_MAX; +} +#endif /* CONFIG_FS_VERITY */ + +#endif /* __XFS_FSVERITY_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index bd6d33557194..6df48d68a919 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -415,6 +415,12 @@ static inline bool xfs_inode_can_sw_atomic_write(const struct xfs_inode *ip) */ #define XFS_IREMAPPING (1U << 15) +/* + * fs-verity's Merkle tree is under construction. The file is read-only, the + * only writes happening are for the fsverity metadata. + */ +#define XFS_VERITY_CONSTRUCTION (1U << 16) + /* All inode state flags related to inode reclaim. */ #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ XFS_IRECLAIM | \ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 9c2f12d5fec9..71ccd4ff5f48 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -32,6 +32,8 @@ #include "xfs_rtbitmap.h" #include "xfs_icache.h" #include "xfs_zone_alloc.h" +#include "xfs_fsverity.h" +#include #define XFS_ALLOC_ALIGN(mp, off) \ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) @@ -1789,6 +1791,9 @@ xfs_buffered_write_iomap_begin( return xfs_direct_write_iomap_begin(inode, offset, count, flags, iomap, srcmap); + if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) + iomap_flags |= IOMAP_F_FSVERITY; + error = xfs_qm_dqattach(ip); if (error) return error; @@ -2113,12 +2118,17 @@ xfs_read_iomap_begin( bool shared = false; unsigned int lockmode = XFS_ILOCK_SHARED; u64 seq; + unsigned int iomap_flags = 0; ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); if (xfs_is_shutdown(mp)) return -EIO; + if (fsverity_active(inode) && + (offset >= xfs_fsverity_metadata_offset(ip))) + iomap_flags |= IOMAP_F_FSVERITY; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; @@ -2132,8 +2142,9 @@ xfs_read_iomap_begin( if (error) return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - shared ? IOMAP_F_SHARED : 0, seq); + iomap_flags |= shared ? IOMAP_F_SHARED : 0; + + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); } const struct iomap_ops xfs_read_iomap_ops = { -- 2.51.2 Use read ioends for fsverity verification. Do not issues fsverity metadata I/O through the same workqueue due to risk of a deadlock by a filled workqueue. Pass fsverity_info from iomap context down to the ioend as hashtable lookups are expensive. Add a simple helper to check that this is not fsverity metadata but file data that needs verification. Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_aops.c | 46 ++++++++++++++++++++++++++++++++++--------- fs/xfs/xfs_fsverity.c | 9 +++++++++ fs/xfs/xfs_fsverity.h | 6 ++++++ 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 9503252a0fa4..ecb07f250956 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -24,6 +24,7 @@ #include "xfs_rtgroup.h" #include "xfs_fsverity.h" #include +#include struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -171,6 +172,23 @@ xfs_end_ioend_write( memalloc_nofs_restore(nofs_flag); } +/* + * IO read completion. + */ +static void +xfs_end_ioend_read( + struct iomap_ioend *ioend) +{ + struct xfs_inode *ip = XFS_I(ioend->io_inode); + + if (!ioend->io_bio.bi_status && + xfs_fsverity_is_file_data(ip, ioend->io_offset)) + fsverity_verify_bio(ioend->io_vi, + &ioend->io_bio); + iomap_finish_ioends(ioend, + blk_status_to_errno(ioend->io_bio.bi_status)); +} + /* * Finish all pending IO completions that require transactional modifications. * @@ -205,8 +223,7 @@ xfs_end_io( list_del_init(&ioend->io_list); iomap_ioend_try_merge(ioend, &tmp); if (bio_op(&ioend->io_bio) == REQ_OP_READ) - iomap_finish_ioends(ioend, - blk_status_to_errno(ioend->io_bio.bi_status)); + xfs_end_ioend_read(ioend); else xfs_end_ioend_write(ioend); cond_resched(); @@ -232,9 +249,14 @@ xfs_end_bio( } spin_lock_irqsave(&ip->i_ioend_lock, flags); - if (list_empty(&ip->i_ioend_list)) - WARN_ON_ONCE(!queue_work(mp->m_unwritten_workqueue, + if (list_empty(&ip->i_ioend_list)) { + if (IS_ENABLED(CONFIG_FS_VERITY) && ioend->io_vi && + ioend->io_offset < xfs_fsverity_metadata_offset(ip)) + fsverity_enqueue_verify_work(&ip->i_ioend_work); + else + WARN_ON_ONCE(!queue_work(mp->m_unwritten_workqueue, &ip->i_ioend_work)); + } list_add_tail(&ioend->io_list, &ip->i_ioend_list); spin_unlock_irqrestore(&ip->i_ioend_lock, flags); } @@ -764,9 +786,13 @@ xfs_bio_submit_read( struct iomap_read_folio_ctx *ctx) { struct bio *bio = ctx->read_ctx; + struct iomap_ioend *ioend; /* defer read completions to the ioend workqueue */ - iomap_init_ioend(iter->inode, bio, ctx->read_ctx_file_offset, 0); + ioend = iomap_init_ioend(iter->inode, bio, ctx->read_ctx_file_offset, + 0); + ioend->io_vi = ctx->vi; + bio->bi_end_io = xfs_end_bio; submit_bio(bio); } @@ -779,11 +805,13 @@ static const struct iomap_read_ops xfs_iomap_read_ops = { static inline const struct iomap_read_ops * xfs_get_iomap_read_ops( - const struct address_space *mapping) + const struct address_space *mapping, + loff_t position) { struct xfs_inode *ip = XFS_I(mapping->host); - if (bdev_has_integrity_csum(xfs_inode_buftarg(ip)->bt_bdev)) + if (bdev_has_integrity_csum(xfs_inode_buftarg(ip)->bt_bdev) || + xfs_fsverity_is_file_data(ip, position)) return &xfs_iomap_read_ops; return &iomap_bio_read_ops; } @@ -795,7 +823,7 @@ xfs_vm_read_folio( { struct iomap_read_folio_ctx ctx = { .cur_folio = folio }; - ctx.ops = xfs_get_iomap_read_ops(folio->mapping); + ctx.ops = xfs_get_iomap_read_ops(folio->mapping, folio_pos(folio)); iomap_read_folio(&xfs_read_iomap_ops, &ctx, NULL); return 0; } @@ -806,7 +834,7 @@ xfs_vm_readahead( { struct iomap_read_folio_ctx ctx = { .rac = rac }; - ctx.ops = xfs_get_iomap_read_ops(rac->mapping), + ctx.ops = xfs_get_iomap_read_ops(rac->mapping, readahead_pos(rac)); iomap_readahead(&xfs_read_iomap_ops, &ctx, NULL); } diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index 6e6a8636a577..b983e20bb5e1 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -19,3 +19,12 @@ xfs_fsverity_metadata_offset( { return round_up(i_size_read(VFS_IC(ip)), XFS_FSVERITY_START_ALIGN); } + +bool +xfs_fsverity_is_file_data( + const struct xfs_inode *ip, + loff_t offset) +{ + return fsverity_active(VFS_IC(ip)) && + offset < xfs_fsverity_metadata_offset(ip); +} diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h index 5771db2cd797..ec77ba571106 100644 --- a/fs/xfs/xfs_fsverity.h +++ b/fs/xfs/xfs_fsverity.h @@ -9,12 +9,18 @@ #ifdef CONFIG_FS_VERITY loff_t xfs_fsverity_metadata_offset(const struct xfs_inode *ip); +bool xfs_fsverity_is_file_data(const struct xfs_inode *ip, loff_t offset); #else static inline loff_t xfs_fsverity_metadata_offset(const struct xfs_inode *ip) { WARN_ON_ONCE(1); return ULLONG_MAX; } +static inline bool xfs_fsverity_is_file_data(const struct xfs_inode *ip, + loff_t offset) +{ + return false; +} #endif /* CONFIG_FS_VERITY */ #endif /* __XFS_FSVERITY_H__ */ -- 2.51.2 Add integration with fs-verity. XFS stores fs-verity descriptor and Merkle tree in the inode data fork at first block aligned to 64k past EOF. The Merkle tree reading/writing is done through iomap interface. The data itself is read to the inode's page cache. When XFS reads from this region iomap doesn't call into fsverity to verify it against Merkle tree. For data, verification is done at ioend completion in a workqueue. When fs-verity is enabled on an inode, the XFS_IVERITY_CONSTRUCTION flag is set meaning that the Merkle tree is being build. The initialization ends with storing of verity descriptor and setting inode on-disk flag (XFS_DIFLAG2_VERITY). Lastly, the XFS_IVERITY_CONSTRUCTION is dropped and I_VERITY is set on inode. The descriptor is stored in a new block aligned to 64k after the last Merkle tree block. The size of the descriptor is stored at the end of the last descriptor block (descriptor can be multiple blocks). Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_bmap_util.c | 8 + fs/xfs/xfs_fsverity.c | 349 ++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_fsverity.h | 2 + fs/xfs/xfs_message.c | 4 + fs/xfs/xfs_message.h | 1 + fs/xfs/xfs_mount.h | 2 + fs/xfs/xfs_super.c | 7 + 7 files changed, 372 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 0ab00615f1ad..18348f4fd2aa 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -31,6 +31,7 @@ #include "xfs_rtbitmap.h" #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" +#include /* Kernel only BMAP related definitions and functions */ @@ -553,6 +554,13 @@ xfs_can_free_eofblocks( if (last_fsb <= end_fsb) return false; + /* + * Nothing to clean on fsverity inodes as they don't use prealloc and + * there no delalloc as only written data is fsverity metadata + */ + if (IS_VERITY(VFS_I(ip))) + return false; + /* * Check if there is an post-EOF extent to free. If there are any * delalloc blocks attached to the inode (data fork delalloc diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index b983e20bb5e1..5a6a48fcf843 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -4,14 +4,26 @@ */ #include "xfs_platform.h" #include "xfs_format.h" -#include "xfs_inode.h" #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_fsverity.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_log_format.h" +#include "xfs_bmap_util.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_trace.h" +#include "xfs_quota.h" #include "xfs_fsverity.h" +#include "xfs_iomap.h" +#include "xfs_error.h" +#include "xfs_health.h" #include #include +#include loff_t xfs_fsverity_metadata_offset( @@ -28,3 +40,338 @@ xfs_fsverity_is_file_data( return fsverity_active(VFS_IC(ip)) && offset < xfs_fsverity_metadata_offset(ip); } + +/* + * Retrieve the verity descriptor. + */ +static int +xfs_fsverity_get_descriptor( + struct inode *inode, + void *buf, + size_t buf_size) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + __be32 d_desc_size; + u32 desc_size; + u64 desc_size_pos; + int error; + u64 desc_pos; + struct xfs_bmbt_irec rec; + int is_empty; + uint32_t blocksize = i_blocksize(VFS_I(ip)); + xfs_fileoff_t last_block_offset; + + ASSERT(inode->i_flags & S_VERITY); + error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &rec, &is_empty); + if (error) + return error; + + if (is_empty) + return -ENODATA; + + last_block_offset = + XFS_FSB_TO_B(mp, rec.br_startoff + rec.br_blockcount); + if (last_block_offset < xfs_fsverity_metadata_offset(ip)) + return -ENODATA; + + desc_size_pos = last_block_offset - sizeof(__be32); + error = fsverity_pagecache_read(inode, (char *)&d_desc_size, + sizeof(d_desc_size), desc_size_pos); + if (error) + return error; + + desc_size = be32_to_cpu(d_desc_size); + if (XFS_IS_CORRUPT(mp, desc_size > FS_VERITY_MAX_DESCRIPTOR_SIZE)) + return -ERANGE; + if (XFS_IS_CORRUPT(mp, desc_size > desc_size_pos)) + return -ERANGE; + + if (!buf_size) + return desc_size; + + if (XFS_IS_CORRUPT(mp, desc_size > buf_size)) + return -ERANGE; + + desc_pos = round_down(desc_size_pos - desc_size, blocksize); + error = fsverity_pagecache_read(inode, buf, desc_size, desc_pos); + if (error) + return error; + + return desc_size; +} + +static int +xfs_fsverity_write_descriptor( + struct file *file, + const void *desc, + u32 desc_size, + u64 merkle_tree_size) +{ + int error; + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + unsigned int blksize = ip->i_mount->m_attr_geo->blksize; + u64 tree_last_block = + xfs_fsverity_metadata_offset(ip) + merkle_tree_size; + u64 desc_pos = + round_up(tree_last_block, XFS_FSVERITY_START_ALIGN); + u64 desc_end = desc_pos + desc_size; + __be32 desc_size_disk = cpu_to_be32(desc_size); + u64 desc_size_pos = + round_up(desc_end + sizeof(desc_size_disk), blksize) - + sizeof(desc_size_disk); + + error = iomap_fsverity_write(file, desc_size_pos, sizeof(__be32), + (const void *)&desc_size_disk, + &xfs_buffered_write_iomap_ops, + &xfs_iomap_write_ops); + if (error) + return error; + + return iomap_fsverity_write(file, desc_pos, desc_size, desc, + &xfs_buffered_write_iomap_ops, + &xfs_iomap_write_ops); +} + +/* + * Try to remove all the fsverity metadata after a failed enablement. + */ +static int +xfs_fsverity_delete_metadata( + struct xfs_inode *ip) +{ + struct xfs_trans *tp; + struct xfs_mount *mp = ip->i_mount; + int error; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + /* + * We removing post EOF data, no need to update i_size as fsverity + * didn't move i_size in the first place + */ + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, XFS_ISIZE(ip)); + if (error) + goto err_cancel; + + error = xfs_trans_commit(tp); + if (error) + goto err_cancel; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return error; + +err_cancel: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_trans_cancel(tp); + return error; +} + + +/* + * Prepare to enable fsverity by clearing old metadata. + */ +static int +xfs_fsverity_begin_enable( + struct file *filp) +{ + struct inode *inode = file_inode(filp); + struct xfs_inode *ip = XFS_I(inode); + int error; + + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); + + if (IS_DAX(inode)) + return -EINVAL; + + if (inode->i_size > XFS_FSVERITY_LARGEST_FILE) + return -EFBIG; + + /* + * Flush pagecache before building Merkle tree. Inode is locked and no + * further writes will happen to the file except fsverity metadata + */ + error = filemap_write_and_wait(inode->i_mapping); + if (error) + return error; + + if (xfs_iflags_test_and_set(ip, XFS_VERITY_CONSTRUCTION)) + return -EBUSY; + + error = xfs_qm_dqattach(ip); + if (error) + return error; + + return xfs_fsverity_delete_metadata(ip); +} + +/* + * Complete (or fail) the process of enabling fsverity. + */ +static int +xfs_fsverity_end_enable( + struct file *file, + const void *desc, + size_t desc_size, + u64 merkle_tree_size) +{ + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error = 0; + loff_t range_start = xfs_fsverity_metadata_offset(ip); + + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); + + /* fs-verity failed, just cleanup */ + if (desc == NULL) + goto out; + + error = xfs_fsverity_write_descriptor(file, desc, desc_size, + merkle_tree_size); + if (error) + goto out; + + /* + * Wait for Merkle tree get written to disk before setting on-disk inode + * flag and clearing XFS_VERITY_CONSTRUCTION + */ + error = filemap_write_and_wait_range(inode->i_mapping, range_start, + LLONG_MAX); + if (error) + goto out; + + /* + * Proactively drop any delayed allocations in COW fork, the fsverity + * files are read-only + */ + if (xfs_is_cow_inode(ip)) + xfs_bmap_punch_delalloc_range(ip, XFS_COW_FORK, 0, LLONG_MAX, + NULL); + + /* + * Set fsverity inode flag + */ + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange, + 0, 0, false, &tp); + if (error) + goto out; + + /* + * Ensure that we've persisted the verity information before we enable + * it on the inode and tell the caller we have sealed the inode. + */ + ip->i_diflags2 |= XFS_DIFLAG2_VERITY; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + if (!error) + inode->i_flags |= S_VERITY; + +out: + if (error) { + int error2; + + error2 = xfs_fsverity_delete_metadata(ip); + if (error2) + xfs_alert(ip->i_mount, +"ino 0x%llx failed to clean up new fsverity metadata, err %d", + ip->i_ino, error2); + } + + xfs_iflags_clear(ip, XFS_VERITY_CONSTRUCTION); + return error; +} + +/* + * Retrieve a merkle tree block. + */ +static struct page * +xfs_fsverity_read_merkle( + struct inode *inode, + pgoff_t index) +{ + index += xfs_fsverity_metadata_offset(XFS_I(inode)) >> PAGE_SHIFT; + + return generic_read_merkle_tree_page(inode, index); +} + +/* + * Retrieve a merkle tree block. + */ +static void +xfs_fsverity_readahead_merkle_tree( + struct inode *inode, + pgoff_t index, + unsigned long nr_pages) +{ + index += xfs_fsverity_metadata_offset(XFS_I(inode)) >> PAGE_SHIFT; + + generic_readahead_merkle_tree(inode, index, nr_pages); +} + +/* + * Write a merkle tree block. + */ +static int +xfs_fsverity_write_merkle( + struct file *file, + const void *buf, + u64 pos, + unsigned int size, + const u8 *zero_digest, + unsigned int digest_size) +{ + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + loff_t position = pos + + xfs_fsverity_metadata_offset(ip); + const char *p; + unsigned int i; + + if (position + size > inode->i_sb->s_maxbytes) + return -EFBIG; + + /* + * If this is a block full of hashes of zeroed blocks, don't bother + * storing the block. We can synthesize them later. + * + * However, do this only in case Merkle tree block == fs block size. + * Iomap synthesizes these blocks based on holes in the merkle tree. We + * won't be able to tell if something need to be synthesizes for the + * range in the fs block. For example, for 4k filesystem block + * + * [ 1k | zero hashes | zero hashes | 1k ] + * + * Iomap won't know about these empty blocks. + */ + for (i = 0, p = buf; i < size; i += digest_size, p += digest_size) + if (memcmp(p, zero_digest, digest_size)) + break; + if (i == size && size == ip->i_mount->m_sb.sb_blocksize) + return 0; + + return iomap_fsverity_write(file, position, size, buf, + &xfs_buffered_write_iomap_ops, + &xfs_iomap_write_ops); +} + +const struct fsverity_operations xfs_fsverity_ops = { + .begin_enable_verity = xfs_fsverity_begin_enable, + .end_enable_verity = xfs_fsverity_end_enable, + .get_verity_descriptor = xfs_fsverity_get_descriptor, + .read_merkle_tree_page = xfs_fsverity_read_merkle, + .readahead_merkle_tree = xfs_fsverity_readahead_merkle_tree, + .write_merkle_tree_block = xfs_fsverity_write_merkle, +}; diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h index ec77ba571106..6a981e20a75b 100644 --- a/fs/xfs/xfs_fsverity.h +++ b/fs/xfs/xfs_fsverity.h @@ -6,8 +6,10 @@ #define __XFS_FSVERITY_H__ #include "xfs_platform.h" +#include #ifdef CONFIG_FS_VERITY +extern const struct fsverity_operations xfs_fsverity_ops; loff_t xfs_fsverity_metadata_offset(const struct xfs_inode *ip); bool xfs_fsverity_is_file_data(const struct xfs_inode *ip, loff_t offset); #else diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index fd297082aeb8..9818d8f8f239 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -153,6 +153,10 @@ xfs_warn_experimental( .opstate = XFS_OPSTATE_WARNED_ZONED, .name = "zoned RT device", }, + [XFS_EXPERIMENTAL_FSVERITY] = { + .opstate = XFS_OPSTATE_WARNED_FSVERITY, + .name = "fsverity", + }, }; ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX); BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX); diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index 49b0ef40d299..083403944f11 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -94,6 +94,7 @@ enum xfs_experimental_feat { XFS_EXPERIMENTAL_SHRINK, XFS_EXPERIMENTAL_LARP, XFS_EXPERIMENTAL_ZONED, + XFS_EXPERIMENTAL_FSVERITY, XFS_EXPERIMENTAL_MAX, }; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 07f6aa3c3f26..84d7cfb5e2c7 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -583,6 +583,8 @@ __XFS_HAS_FEAT(nouuid, NOUUID) #define XFS_OPSTATE_WARNED_ZONED 19 /* (Zoned) GC is in progress */ #define XFS_OPSTATE_ZONEGC_RUNNING 20 +/* Kernel has logged a warning about fsverity support */ +#define XFS_OPSTATE_WARNED_FSVERITY 21 #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f8de44443e81..d9d442009610 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -30,6 +30,7 @@ #include "xfs_filestream.h" #include "xfs_quota.h" #include "xfs_sysfs.h" +#include "xfs_fsverity.h" #include "xfs_ondisk.h" #include "xfs_rmap_item.h" #include "xfs_refcount_item.h" @@ -1686,6 +1687,9 @@ xfs_fs_fill_super( sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif sb->s_op = &xfs_super_operations; +#ifdef CONFIG_FS_VERITY + sb->s_vop = &xfs_fsverity_ops; +#endif /* * Delay mount work if the debug hook is set. This is debug @@ -1939,6 +1943,9 @@ xfs_fs_fill_super( if (error) goto out_filestream_unmount; + if (xfs_has_verity(mp)) + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_FSVERITY); + root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = -ENOENT; -- 2.51.2 XFS preallocates spaces during writes. In normal I/O this space, if unused, is removed by truncate. For files with fsverity XFS does not use truncate as fsverity metadata is stored past EOF. After we're done with writing fsverity metadata iterate over extents in that region and remove any unwritten ones. These would be left overs in the holes in the merkle tree and past fsverity descriptor. Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_fsverity.c | 67 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index 5a6a48fcf843..b193009a1bdb 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -21,6 +21,8 @@ #include "xfs_iomap.h" #include "xfs_error.h" #include "xfs_health.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" #include #include #include @@ -173,6 +175,63 @@ xfs_fsverity_delete_metadata( return error; } +static int +xfs_fsverity_cancel_unwritten( + struct xfs_inode *ip, + xfs_fileoff_t start, + xfs_fileoff_t end) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSB(mp, start); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end); + struct xfs_bmbt_irec imap; + int nimaps; + int error = 0; + int done; + + + while (offset_fsb < end_fsb) { + nimaps = 1; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, + 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, + &imap, &nimaps, 0); + if (error) + goto out_cancel; + + if (nimaps == 0) + goto out_cancel; + + if (imap.br_state == XFS_EXT_UNWRITTEN) { + xfs_trans_ijoin(tp, ip, 0); + + error = xfs_bunmapi(tp, ip, imap.br_startoff, + imap.br_blockcount, 0, 1, &done); + if (error) + goto out_cancel; + + error = xfs_trans_commit(tp); + } else { + xfs_trans_cancel(tp); + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + offset_fsb = imap.br_startoff + imap.br_blockcount; + } + + return error; +out_cancel: + xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + /* * Prepare to enable fsverity by clearing old metadata. @@ -248,6 +307,14 @@ xfs_fsverity_end_enable( if (error) goto out; + /* + * Remove unwritten extents left by COW preallocations and write + * preallocation in the merkle tree holes and past descriptor + */ + error = xfs_fsverity_cancel_unwritten(ip, range_start, LLONG_MAX); + if (error) + goto out; + /* * Proactively drop any delayed allocations in COW fork, the fsverity * files are read-only -- 2.51.2 Add fs-verity ioctls to enable, dump metadata (descriptor and Merkle tree pages) and obtain file's digest. [djwong: remove unnecessary casting] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/xfs_ioctl.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index facffdc8dca8..e633d56cad00 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -46,6 +46,7 @@ #include #include +#include /* Return 0 on success or positive error */ int @@ -1426,6 +1427,19 @@ xfs_file_ioctl( case XFS_IOC_VERIFY_MEDIA: return xfs_ioc_verify_media(filp, arg); + case FS_IOC_ENABLE_VERITY: + if (!xfs_has_verity(mp)) + return -EOPNOTSUPP; + return fsverity_ioctl_enable(filp, arg); + case FS_IOC_MEASURE_VERITY: + if (!xfs_has_verity(mp)) + return -EOPNOTSUPP; + return fsverity_ioctl_measure(filp, arg); + case FS_IOC_READ_VERITY_METADATA: + if (!xfs_has_verity(mp)) + return -EOPNOTSUPP; + return fsverity_ioctl_read_metadata(filp, arg); + default: return -ENOTTY; } -- 2.51.2 From: "Darrick J. Wong" Advertise that this filesystem supports fsverity. Signed-off-by: Darrick J. Wong Reviewed-by: Andrey Albershteyn Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/libxfs/xfs_sb.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index d165de607d17..ebf17a0b0722 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -250,6 +250,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */ #define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */ #define XFS_FSOP_GEOM_FLAGS_ZONED (1 << 27) /* zoned rt device */ +#define XFS_FSOP_GEOM_FLAGS_VERITY (1 << 28) /* fs-verity */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a15510ebd2f1..222bbe5559df 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1590,6 +1590,8 @@ xfs_fs_geometry( geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR; if (xfs_has_zoned(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ZONED; + if (xfs_has_verity(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_VERITY; geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); -- 2.51.2 From: "Darrick J. Wong" If an inode has the incore verity iflag set, make sure that we can actually activate fsverity on that inode. If activation fails due to a fsverity metadata validation error, clear the flag. The usage model for fsverity requires that any program that cares about verity state is required to call statx/getflags to check that the flag is set after opening the file, so clearing the flag will not compromise that model. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/scrub/attr.c | 7 +++++ fs/xfs/scrub/common.c | 53 +++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/common.h | 2 ++ fs/xfs/scrub/inode.c | 7 +++++ fs/xfs/scrub/inode_repair.c | 36 +++++++++++++++++++++++++ 5 files changed, 105 insertions(+) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 390ac2e11ee0..daf7962c2374 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -649,6 +649,13 @@ xchk_xattr( if (!xfs_inode_hasattr(sc->ip)) return -ENOENT; + /* + * If this is a verity file that won't activate, we cannot check the + * merkle tree geometry. + */ + if (xchk_inode_verity_broken(sc->ip)) + xchk_set_incomplete(sc); + /* Allocate memory for xattr checking. */ error = xchk_setup_xattr_buf(sc, 0); if (error == -ENOMEM) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 20e63069088b..6cc6bea9c554 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -45,6 +45,8 @@ #include "scrub/health.h" #include "scrub/tempfile.h" +#include + /* Common code for the metadata scrubbers. */ /* @@ -1743,3 +1745,54 @@ xchk_inode_count_blocks( return xfs_bmap_count_blocks(sc->tp, sc->ip, whichfork, nextents, count); } + +/* + * If this inode has S_VERITY set on it, read the verity info. If the reading + * fails with anything other than ENOMEM, the file is corrupt, which we can + * detect later with fsverity_active. + * + * Callers must hold the IOLOCK and must not hold the ILOCK of sc->ip because + * activation reads inode data. + */ +int +xchk_inode_setup_verity( + struct xfs_scrub *sc) +{ + int error; + + if (!fsverity_active(VFS_I(sc->ip))) + return 0; + + error = fsverity_ensure_verity_info(VFS_I(sc->ip)); + switch (error) { + case 0: + /* fsverity is active */ + break; + case -ENODATA: + case -EMSGSIZE: + case -EINVAL: + case -EFSCORRUPTED: + case -EFBIG: + /* + * The nonzero errno codes above are the error codes that can + * be returned from fsverity on metadata validation errors. + */ + return 0; + default: + /* runtime errors */ + return error; + } + + return 0; +} + +/* + * Is this a verity file that failed to activate? Callers must have tried to + * activate fsverity via xchk_inode_setup_verity. + */ +bool +xchk_inode_verity_broken( + struct xfs_inode *ip) +{ + return fsverity_active(VFS_I(ip)) && !fsverity_get_info(VFS_I(ip)); +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index f2ecc68538f0..aa16d310bd6d 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -264,6 +264,8 @@ int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino, bool *inuse); int xchk_inode_count_blocks(struct xfs_scrub *sc, int whichfork, xfs_extnum_t *nextents, xfs_filblks_t *count); +int xchk_inode_setup_verity(struct xfs_scrub *sc); +bool xchk_inode_verity_broken(struct xfs_inode *ip); bool xchk_inode_is_dirtree_root(const struct xfs_inode *ip); bool xchk_inode_is_sb_rooted(const struct xfs_inode *ip); diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 948d04dcba2a..8ce6917e22b4 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -36,6 +36,10 @@ xchk_prepare_iscrub( xchk_ilock(sc, XFS_IOLOCK_EXCL); + error = xchk_inode_setup_verity(sc); + if (error) + return error; + error = xchk_trans_alloc(sc, 0); if (error) return error; @@ -833,6 +837,9 @@ xchk_inode( if (S_ISREG(VFS_I(sc->ip)->i_mode)) xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); + if (xchk_inode_verity_broken(sc->ip)) + xchk_ino_set_corrupt(sc, sc->sm->sm_ino); + xchk_inode_check_unlinked(sc); xchk_inode_xref(sc, sc->ip->i_ino, &di); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 9738b9ce3f2d..3761e3922466 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -573,6 +573,8 @@ xrep_dinode_flags( dip->di_nrext64_pad = 0; else if (dip->di_version >= 3) dip->di_v3_pad = 0; + if (!xfs_has_verity(mp) || !S_ISREG(mode)) + flags2 &= ~XFS_DIFLAG2_VERITY; if (flags2 & XFS_DIFLAG2_METADATA) { xfs_failaddr_t fa; @@ -1613,6 +1615,10 @@ xrep_dinode_core( if (iget_error) return iget_error; + error = xchk_inode_setup_verity(sc); + if (error) + return error; + error = xchk_trans_alloc(sc, 0); if (error) return error; @@ -2032,6 +2038,27 @@ xrep_inode_unlinked( return 0; } +/* + * If this file is a fsverity file, xchk_prepare_iscrub or xrep_dinode_core + * should have activated it. If it's still not active, then there's something + * wrong with the verity descriptor and we should turn it off. + */ +STATIC int +xrep_inode_verity( + struct xfs_scrub *sc) +{ + struct inode *inode = VFS_I(sc->ip); + + if (xchk_inode_verity_broken(sc->ip)) { + sc->ip->i_diflags2 &= ~XFS_DIFLAG2_VERITY; + inode->i_flags &= ~S_VERITY; + + xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); + } + + return 0; +} + /* Repair an inode's fields. */ int xrep_inode( @@ -2081,6 +2108,15 @@ xrep_inode( return error; } + /* + * Disable fsverity if it cannot be activated. Activation failure + * prohibits the file from being opened, so there cannot be another + * program with an open fd to what it thinks is a verity file. + */ + error = xrep_inode_verity(sc); + if (error) + return error; + /* Reconnect incore unlinked list */ error = xrep_inode_unlinked(sc); if (error) -- 2.51.2 Report corrupted fsverity descriptor through health system. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn --- fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/libxfs/xfs_health.h | 4 +++- fs/xfs/xfs_fsverity.c | 13 ++++++++++--- fs/xfs/xfs_health.c | 1 + 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index ebf17a0b0722..cece31ecee81 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -422,6 +422,7 @@ struct xfs_bulkstat { #define XFS_BS_SICK_SYMLINK (1 << 6) /* symbolic link remote target */ #define XFS_BS_SICK_PARENT (1 << 7) /* parent pointers */ #define XFS_BS_SICK_DIRTREE (1 << 8) /* directory tree structure */ +#define XFS_BS_SICK_FSVERITY (1 << 9) /* fsverity metadata */ /* * Project quota id helpers (previously projid was 16bit only diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 1d45cf5789e8..932b447190da 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -104,6 +104,7 @@ struct xfs_rtgroup; /* Don't propagate sick status to ag health summary during inactivation */ #define XFS_SICK_INO_FORGET (1 << 12) #define XFS_SICK_INO_DIRTREE (1 << 13) /* directory tree structure */ +#define XFS_SICK_INO_FSVERITY (1 << 14) /* fsverity metadata */ /* Primary evidence of health problems in a given group. */ #define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \ @@ -140,7 +141,8 @@ struct xfs_rtgroup; XFS_SICK_INO_XATTR | \ XFS_SICK_INO_SYMLINK | \ XFS_SICK_INO_PARENT | \ - XFS_SICK_INO_DIRTREE) + XFS_SICK_INO_DIRTREE | \ + XFS_SICK_INO_FSVERITY) #define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \ XFS_SICK_INO_BMBTA_ZAPPED | \ diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index b193009a1bdb..ecc66ee8bac5 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -84,16 +84,23 @@ xfs_fsverity_get_descriptor( return error; desc_size = be32_to_cpu(d_desc_size); - if (XFS_IS_CORRUPT(mp, desc_size > FS_VERITY_MAX_DESCRIPTOR_SIZE)) + if (XFS_IS_CORRUPT(mp, desc_size > FS_VERITY_MAX_DESCRIPTOR_SIZE)) { + xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_FSVERITY); return -ERANGE; - if (XFS_IS_CORRUPT(mp, desc_size > desc_size_pos)) + } + + if (XFS_IS_CORRUPT(mp, desc_size > desc_size_pos)) { + xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_FSVERITY); return -ERANGE; + } if (!buf_size) return desc_size; - if (XFS_IS_CORRUPT(mp, desc_size > buf_size)) + if (XFS_IS_CORRUPT(mp, desc_size > buf_size)) { + xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_FSVERITY); return -ERANGE; + } desc_pos = round_down(desc_size_pos - desc_size, blocksize); error = fsverity_pagecache_read(inode, buf, desc_size, desc_pos); diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 239b843e83d4..be66760fb120 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -625,6 +625,7 @@ static const struct ioctl_sick_map ino_map[] = { { XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR }, { XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK }, { XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE }, + { XFS_SICK_INO_FSVERITY, XFS_BS_SICK_FSVERITY }, }; /* Fill out bulkstat health info. */ -- 2.51.2 Even though fsverity has traces, debugging issues with varying block sizes could be a bit less transparent without read/write traces. Signed-off-by: Andrey Albershteyn Reviewed-by: "Darrick J. Wong" --- fs/xfs/xfs_fsverity.c | 6 ++++++ fs/xfs/xfs_trace.h | 46 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c index ecc66ee8bac5..6db8113da52f 100644 --- a/fs/xfs/xfs_fsverity.c +++ b/fs/xfs/xfs_fsverity.c @@ -64,6 +64,8 @@ xfs_fsverity_get_descriptor( uint32_t blocksize = i_blocksize(VFS_I(ip)); xfs_fileoff_t last_block_offset; + trace_xfs_fsverity_get_descriptor(ip); + ASSERT(inode->i_flags & S_VERITY); error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &rec, &is_empty); if (error) @@ -377,6 +379,7 @@ xfs_fsverity_read_merkle( pgoff_t index) { index += xfs_fsverity_metadata_offset(XFS_I(inode)) >> PAGE_SHIFT; + trace_xfs_fsverity_read_merkle(XFS_I(inode), index, PAGE_SIZE); return generic_read_merkle_tree_page(inode, index); } @@ -391,6 +394,7 @@ xfs_fsverity_readahead_merkle_tree( unsigned long nr_pages) { index += xfs_fsverity_metadata_offset(XFS_I(inode)) >> PAGE_SHIFT; + trace_xfs_fsverity_read_merkle(XFS_I(inode), index, PAGE_SIZE); generic_readahead_merkle_tree(inode, index, nr_pages); } @@ -414,6 +418,8 @@ xfs_fsverity_write_merkle( const char *p; unsigned int i; + trace_xfs_fsverity_write_merkle(XFS_I(inode), position, size); + if (position + size > inode->i_sb->s_maxbytes) return -EFBIG; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5e8190fe2be9..d05f79ec92db 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -6440,6 +6440,52 @@ TRACE_EVENT(xfs_verify_media_error, __entry->error) ); +TRACE_EVENT(xfs_fsverity_get_descriptor, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino) +); + +DECLARE_EVENT_CLASS(xfs_fsverity_class, + TP_PROTO(struct xfs_inode *ip, u64 pos, size_t length), + TP_ARGS(ip, pos, length), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u64, pos) + __field(size_t, length) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->pos = pos; + __entry->length = length; + ), + TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%zx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pos, + __entry->length) +) + +#define DEFINE_FSVERITY_EVENT(name) \ +DEFINE_EVENT(xfs_fsverity_class, name, \ + TP_PROTO(struct xfs_inode *ip, u64 pos, size_t length), \ + TP_ARGS(ip, pos, length)) +DEFINE_FSVERITY_EVENT(xfs_fsverity_read_merkle); +DEFINE_FSVERITY_EVENT(xfs_fsverity_write_merkle); +DEFINE_FSVERITY_EVENT(xfs_fsverity_file_corrupt); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH -- 2.51.2 Finalize fs-verity integration in XFS by making kernel fs-verity aware with ro-compat flag. Signed-off-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong [djwong: add spaces] Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_format.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 4dff29659e40..0ce46c234b9c 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -378,8 +378,9 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ - XFS_SB_FEAT_RO_COMPAT_REFLINK| \ - XFS_SB_FEAT_RO_COMPAT_INOBTCNT) + XFS_SB_FEAT_RO_COMPAT_REFLINK | \ + XFS_SB_FEAT_RO_COMPAT_INOBTCNT | \ + XFS_SB_FEAT_RO_COMPAT_VERITY) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( -- 2.51.2