Extend statx system call to return additional info for buffered atomic write support for a file. Currently only direct IO is supported. New flags STATX_WRITE_ATOMIC_BUF and STATX_ATTR_WRITE_ATOMIC_BUF are for indicating whether the file knows and supports buffered atomic writes. Structure statx members stx_atomic_write_unit_{min, max, segments_max} will be reused for bufferd atomic writes. Flags STATX_WRITE_ATOMIC_DIO and STATX_WRITE_ATOMIC_BUF are mutually exclusive. With both flags set, statx will ignore the request and neither fields in statx.result_mask will be set. Also, make sure ext4 and xfs report atomic write unit min and max of 0 when the new flag is passed. Co-developed-by: John Garry Signed-off-by: John Garry Signed-off-by: Ojaswin Mujoo --- block/bdev.c | 3 +- fs/ext4/inode.c | 7 +- fs/stat.c | 33 +++-- fs/xfs/xfs_file.c | 9 +- fs/xfs/xfs_iops.c | 121 ++++++++++-------- fs/xfs/xfs_iops.h | 6 +- include/linux/fs.h | 3 +- include/trace/misc/fs.h | 1 + include/uapi/linux/stat.h | 2 + tools/include/uapi/linux/stat.h | 2 + .../trace/beauty/include/uapi/linux/stat.h | 2 + 11 files changed, 119 insertions(+), 70 deletions(-) diff --git a/block/bdev.c b/block/bdev.c index 3bc90d5feb4c..8f0eab0a1ecf 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1335,8 +1335,7 @@ void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask) generic_fill_statx_atomic_writes(stat, queue_atomic_write_unit_min_bytes(bd_queue), - queue_atomic_write_unit_max_bytes(bd_queue), - 0); + queue_atomic_write_unit_max_bytes(bd_queue), 0, true); } stat->blksize = bdev_io_min(bdev); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9555149a8ba6..0d5013993fba 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -6106,8 +6106,11 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path, awu_max = sbi->s_awu_max; } - generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0); - } + generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0, + true); + } else if (request_mask & STATX_WRITE_ATOMIC_BUF) + /* Atomic writes for buferred IO not supported yet */ + generic_fill_statx_atomic_writes(stat, 0, 0, 0, false); flags = ei->i_flags & EXT4_FL_USER_VISIBLE; if (flags & EXT4_APPEND_FL) diff --git a/fs/stat.c b/fs/stat.c index 7eb2a247ab67..8ba3993dcd09 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -137,20 +137,27 @@ EXPORT_SYMBOL(generic_fill_statx_attr); * @unit_min: Minimum supported atomic write length in bytes * @unit_max: Maximum supported atomic write length in bytes * @unit_max_opt: Optimised maximum supported atomic write length in bytes + * @is_dio: Is the stat request for dio * - * Fill in the STATX{_ATTR}_WRITE_ATOMIC_DIO flags in the kstat structure from - * atomic write unit_min and unit_max values. + * Fill in the STATX{_ATTR}_WRITE_ATOMIC_{DIO,BUF} flags in the kstat structure + * from atomic write unit_min and unit_max values. */ void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, unsigned int unit_max, - unsigned int unit_max_opt) + unsigned int unit_max_opt, + bool is_dio) { - /* Confirm that the request type is known */ - stat->result_mask |= STATX_WRITE_ATOMIC_DIO; + if (is_dio) { + /* Confirm that the request type is known */ + stat->result_mask |= STATX_WRITE_ATOMIC_DIO; - /* Confirm that the file attribute type is known */ - stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_DIO; + /* Confirm that the file attribute type is known */ + stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_DIO; + } else { + stat->result_mask |= STATX_WRITE_ATOMIC_BUF; + stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_BUF; + } if (unit_min) { stat->atomic_write_unit_min = unit_min; @@ -160,7 +167,10 @@ void generic_fill_statx_atomic_writes(struct kstat *stat, stat->atomic_write_segments_max = 1; /* Confirm atomic writes are actually supported */ - stat->attributes |= STATX_ATTR_WRITE_ATOMIC_DIO; + if (is_dio) + stat->attributes |= STATX_ATTR_WRITE_ATOMIC_DIO; + else + stat->attributes |= STATX_ATTR_WRITE_ATOMIC_BUF; } } EXPORT_SYMBOL_GPL(generic_fill_statx_atomic_writes); @@ -206,6 +216,13 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT | STATX_ATTR_DAX); + if (request_mask & STATX_WRITE_ATOMIC_BUF && + request_mask & STATX_WRITE_ATOMIC_DIO) { + /* Both are mutually exclusive, disable them */ + request_mask &= + ~(STATX_WRITE_ATOMIC_BUF | STATX_WRITE_ATOMIC_DIO); + } + idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) { int ret; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 5b9864c8582e..3efa575570ed 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1087,6 +1087,7 @@ xfs_file_write_iter( struct xfs_inode *ip = XFS_I(inode); ssize_t ret; size_t ocount = iov_iter_count(from); + bool is_dio = iocb->ki_flags & IOCB_DIRECT; XFS_STATS_INC(ip->i_mount, xs_write_calls); @@ -1097,10 +1098,10 @@ xfs_file_write_iter( return -EIO; if (iocb->ki_flags & IOCB_ATOMIC) { - if (ocount < xfs_get_atomic_write_min(ip)) + if (ocount < xfs_get_atomic_write_min(ip, is_dio)) return -EINVAL; - if (ocount > xfs_get_atomic_write_max(ip)) + if (ocount > xfs_get_atomic_write_max(ip, is_dio)) return -EINVAL; ret = generic_atomic_write_valid(iocb, from); @@ -1111,7 +1112,7 @@ xfs_file_write_iter( if (IS_DAX(inode)) return xfs_file_dax_write(iocb, from); - if (iocb->ki_flags & IOCB_DIRECT) { + if (is_dio) { /* * Allow a directio write to fall back to a buffered * write *only* in the case that we're doing a reflink @@ -1568,7 +1569,7 @@ xfs_file_open( if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; - if (xfs_get_atomic_write_min(XFS_I(inode)) > 0) + if (xfs_get_atomic_write_min(XFS_I(inode), file->f_flags & O_DIRECT) > 0) file->f_mode |= FMODE_CAN_ATOMIC_WRITE; return generic_file_open(inode, file); } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f41fcdd3043b..f036c46b19c5 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -601,81 +601,99 @@ xfs_report_dioalign( unsigned int xfs_get_atomic_write_min( - struct xfs_inode *ip) + struct xfs_inode *ip, + bool is_dio) { - struct xfs_mount *mp = ip->i_mount; + if (is_dio) { + struct xfs_mount *mp = ip->i_mount; - /* - * If we can complete an atomic write via atomic out of place writes, - * then advertise a minimum size of one fsblock. Without this - * mechanism, we can only guarantee atomic writes up to a single LBA. - * - * If out of place writes are not available, we can guarantee an atomic - * write of exactly one single fsblock if the bdev will make that - * guarantee for us. - */ - if (xfs_inode_can_hw_atomic_write(ip) || - xfs_inode_can_sw_atomic_write(ip)) - return mp->m_sb.sb_blocksize; + /* + * If we can complete an atomic write via atomic out of place writes, + * then advertise a minimum size of one fsblock. Without this + * mechanism, we can only guarantee atomic writes up to a single LBA. + * + * If out of place writes are not available, we can guarantee an atomic + * write of exactly one single fsblock if the bdev will make that + * guarantee for us. + */ + if (xfs_inode_can_hw_atomic_write(ip) || + xfs_inode_can_sw_atomic_write(ip)) + return mp->m_sb.sb_blocksize; + } + /* buffered IO not supported yet so return 0 right away */ return 0; } unsigned int xfs_get_atomic_write_max( - struct xfs_inode *ip) + struct xfs_inode *ip, + bool is_dio) { struct xfs_mount *mp = ip->i_mount; - /* - * If out of place writes are not available, we can guarantee an atomic - * write of exactly one single fsblock if the bdev will make that - * guarantee for us. - */ - if (!xfs_inode_can_sw_atomic_write(ip)) { - if (xfs_inode_can_hw_atomic_write(ip)) - return mp->m_sb.sb_blocksize; - return 0; + if (is_dio) { + /* + * If out of place writes are not available, we can guarantee an atomic + * write of exactly one single fsblock if the bdev will make that + * guarantee for us. + */ + if (!xfs_inode_can_sw_atomic_write(ip)) { + if (xfs_inode_can_hw_atomic_write(ip)) + return mp->m_sb.sb_blocksize; + return 0; + } + + /* + * If we can complete an atomic write via atomic out of place writes, + * then advertise a maximum size of whatever we can complete through + * that means. Hardware support is reported via max_opt, not here. + */ + if (XFS_IS_REALTIME_INODE(ip)) + return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max); + return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max); } - /* - * If we can complete an atomic write via atomic out of place writes, - * then advertise a maximum size of whatever we can complete through - * that means. Hardware support is reported via max_opt, not here. - */ - if (XFS_IS_REALTIME_INODE(ip)) - return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max); - return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max); + /* buffered IO not supported yet so return 0 right away */ + return 0; } unsigned int xfs_get_atomic_write_max_opt( - struct xfs_inode *ip) + struct xfs_inode *ip, + bool is_dio) { - unsigned int awu_max = xfs_get_atomic_write_max(ip); + if (is_dio) { + unsigned int awu_max = xfs_get_atomic_write_max(ip, is_dio); - /* if the max is 1x block, then just keep behaviour that opt is 0 */ - if (awu_max <= ip->i_mount->m_sb.sb_blocksize) - return 0; + /* if the max is 1x block, then just keep behaviour that opt is 0 */ + if (awu_max <= ip->i_mount->m_sb.sb_blocksize) + return 0; - /* - * Advertise the maximum size of an atomic write that we can tell the - * block device to perform for us. In general the bdev limit will be - * less than our out of place write limit, but we don't want to exceed - * the awu_max. - */ - return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max); + /* + * Advertise the maximum size of an atomic write that we can tell the + * block device to perform for us. In general the bdev limit will be + * less than our out of place write limit, but we don't want to exceed + * the awu_max. + */ + return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max); + } + + /* buffered IO not supported yet so return 0 right away */ + return 0; } static void xfs_report_atomic_write( struct xfs_inode *ip, - struct kstat *stat) + struct kstat *stat, + bool is_dio) { generic_fill_statx_atomic_writes(stat, - xfs_get_atomic_write_min(ip), - xfs_get_atomic_write_max(ip), - xfs_get_atomic_write_max_opt(ip)); + xfs_get_atomic_write_min(ip, is_dio), + xfs_get_atomic_write_max(ip, is_dio), + xfs_get_atomic_write_max_opt(ip, is_dio), + is_dio); } STATIC int @@ -741,8 +759,11 @@ xfs_vn_getattr( case S_IFREG: if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN)) xfs_report_dioalign(ip, stat); - if (request_mask & STATX_WRITE_ATOMIC_DIO) - xfs_report_atomic_write(ip, stat); + if (request_mask & + (STATX_WRITE_ATOMIC_DIO | STATX_WRITE_ATOMIC_BUF)) + xfs_report_atomic_write(ip, stat, + (request_mask & + STATX_WRITE_ATOMIC_DIO)); fallthrough; default: stat->blksize = xfs_stat_blksize(ip); diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index 0896f6b8b3b8..09e79263add1 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -19,8 +19,8 @@ int xfs_inode_init_security(struct inode *inode, struct inode *dir, extern void xfs_setup_inode(struct xfs_inode *ip); extern void xfs_setup_iops(struct xfs_inode *ip); extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init); -unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip); -unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip); -unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip); +unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip, bool is_dio); +unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip, bool is_dio); +unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip, bool is_dio); #endif /* __XFS_IOPS_H__ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index c895146c1444..2dec66913e97 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3563,7 +3563,8 @@ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, unsigned int unit_max, - unsigned int unit_max_opt); + unsigned int unit_max_opt, + bool is_dio); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h index 19ea9339b9bd..3b69910a5998 100644 --- a/include/trace/misc/fs.h +++ b/include/trace/misc/fs.h @@ -162,4 +162,5 @@ { STATX_MNT_ID_UNIQUE, "MNT_ID_UNIQUE" }, \ { STATX_SUBVOL, "SUBVOL" }, \ { STATX_WRITE_ATOMIC_DIO, "WRITE_ATOMIC_DIO" }, \ + { STATX_WRITE_ATOMIC_BUF, "WRITE_ATOMIC_BUF" }, \ { STATX_DIO_READ_ALIGN, "DIO_READ_ALIGN" }) diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 57f558be933e..2d77da04df23 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -221,6 +221,7 @@ struct statx { /* Old name kept for backward compatibility */ #define STATX_WRITE_ATOMIC STATX_WRITE_ATOMIC_DIO #define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ +#define STATX_WRITE_ATOMIC_BUF 0x00040000U /* Want/got buf-io atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -259,6 +260,7 @@ struct statx { #define STATX_ATTR_WRITE_ATOMIC_DIO 0x00400000 /* File supports dio atomic write operations */ /* Old name kept for backward compatibility */ #define STATX_ATTR_WRITE_ATOMIC STATX_ATTR_WRITE_ATOMIC_DIO +#define STATX_ATTR_WRITE_ATOMIC_BUF 0x00800000 /* File supports buf-io atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 57f558be933e..a7e0036669c2 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -221,6 +221,7 @@ struct statx { /* Old name kept for backward compatibility */ #define STATX_WRITE_ATOMIC STATX_WRITE_ATOMIC_DIO #define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ +#define STATX_WRITE_ATOMIC_BUF 0x00040000U /* Want/got buf-io atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -259,6 +260,7 @@ struct statx { #define STATX_ATTR_WRITE_ATOMIC_DIO 0x00400000 /* File supports dio atomic write operations */ /* Old name kept for backward compatibility */ #define STATX_ATTR_WRITE_ATOMIC STATX_ATTR_WRITE_ATOMIC_DIO +#define STATX_ATTR_WRITE_ATOMIC_BUF 0x00800000 /* File supports buf-io atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 57f558be933e..2d77da04df23 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -221,6 +221,7 @@ struct statx { /* Old name kept for backward compatibility */ #define STATX_WRITE_ATOMIC STATX_WRITE_ATOMIC_DIO #define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ +#define STATX_WRITE_ATOMIC_BUF 0x00040000U /* Want/got buf-io atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -259,6 +260,7 @@ struct statx { #define STATX_ATTR_WRITE_ATOMIC_DIO 0x00400000 /* File supports dio atomic write operations */ /* Old name kept for backward compatibility */ #define STATX_ATTR_WRITE_ATOMIC STATX_ATTR_WRITE_ATOMIC_DIO +#define STATX_ATTR_WRITE_ATOMIC_BUF 0x00800000 /* File supports buf-io atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ -- 2.51.0