Add three new hooks in struct file_operations to allow fileystems to manage write streams at per-file level. Signed-off-by: Kanchan Joshi --- include/linux/fs.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index 2e4d1e8b0e71..ff9aa391eda7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1967,6 +1967,12 @@ struct file_operations { int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); int (*mmap_prepare)(struct vm_area_desc *); + /* To fetch number of streams that are available for a file */ + int (*get_max_write_streams)(struct file *); + /* To set write stream on a file */ + int (*set_write_stream)(struct file *, unsigned long); + /* To query the write stream on a file */ + int (*get_write_stream)(struct file *); } __randomize_layout; /* Supports async buffered reads */ -- 2.25.1 Wire up the userspace interface for write stream management via three new fcntls. F_GET_MAX_WRITE_STREAMS: Returns the number of available streams. F_SET_WRITE_STREAM: Assign a specific stream value to the file. F_GET_WRITE_STREAM: Query what stream value is set on the file. Application should query the available streams by calling F_GET_MAX_WRITE_STREAMS first. If returned value is N, valid stream values for the file are 0 to N. Stream value 0 implies that no stream is set on the file. Setting a larger value than available streams is rejected. Signed-off-by: Kanchan Joshi --- fs/fcntl.c | 33 +++++++++++++++++++++++++++++++++ include/uapi/linux/fcntl.h | 4 ++++ 2 files changed, 37 insertions(+) diff --git a/fs/fcntl.c b/fs/fcntl.c index f93dbca08435..c982f0506a3f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -441,6 +441,30 @@ static int f_owner_sig(struct file *filp, int signum, bool setsig) return ret; } +static long fcntl_get_max_write_streams(struct file *filp) +{ + if (filp->f_op->get_max_write_streams) + return filp->f_op->get_max_write_streams(filp); + + return -EOPNOTSUPP; +} + +static long fcntl_set_write_stream(struct file *filp, unsigned long arg) +{ + if (filp->f_op->set_write_stream) + return filp->f_op->set_write_stream(filp, arg); + + return -EOPNOTSUPP; +} + +static long fcntl_get_write_stream(struct file *filp) +{ + if (filp->f_op->get_write_stream) + return filp->f_op->get_write_stream(filp); + + return -EOPNOTSUPP; +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -563,6 +587,15 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, return -EFAULT; err = fcntl_setdeleg(fd, filp, &deleg); break; + case F_GET_MAX_WRITE_STREAMS: + err = fcntl_get_max_write_streams(filp); + break; + case F_SET_WRITE_STREAM: + err = fcntl_set_write_stream(filp, arg); + break; + case F_GET_WRITE_STREAM: + err = fcntl_get_write_stream(filp); + break; default: break; } diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index aadfbf6e0cb3..4b75470fc07a 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -190,4 +190,8 @@ struct delegation { #define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution would be allowed. */ +/* write stream management */ +#define F_GET_MAX_WRITE_STREAMS (F_LINUX_SPECIFIC_BASE + 17) +#define F_GET_WRITE_STREAM (F_LINUX_SPECIFIC_BASE + 18) +#define F_SET_WRITE_STREAM (F_LINUX_SPECIFIC_BASE + 19) #endif /* _UAPI_LINUX_FCNTL_H */ -- 2.25.1 Add a new write_stream field to struct iomap. Existing hole is used to place the new field. Propagate write_stream from iomap to bio in both direct I/O and buffered writeback paths. Signed-off-by: Kanchan Joshi --- fs/iomap/direct-io.c | 1 + fs/iomap/ioend.c | 3 +++ include/linux/iomap.h | 2 ++ 3 files changed, 6 insertions(+) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 8c1fd7573aee..1fc7e1831b1c 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -333,6 +333,7 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter, pos >> iter->inode->i_blkbits, GFP_KERNEL); bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos); bio->bi_write_hint = iter->inode->i_write_hint; + bio->bi_write_stream = iter->iomap.write_stream; bio->bi_ioprio = dio->iocb->ki_ioprio; bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index e4d57cb969f1..bb5886c1e5a0 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -113,6 +113,7 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, GFP_NOFS, &iomap_ioend_bioset); bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); bio->bi_write_hint = wpc->inode->i_write_hint; + bio->bi_write_stream = wpc->iomap.write_stream; wbc_init_bio(wpc->wbc, bio); wpc->nr_folios = 0; return iomap_init_ioend(wpc->inode, bio, pos, ioend_flags); @@ -133,6 +134,8 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && iomap_sector(&wpc->iomap, pos) != bio_end_sector(&ioend->io_bio)) return false; + if (wpc->iomap.write_stream != ioend->io_bio.bi_write_stream) + return false; /* * Limit ioend bio chain lengths to minimise IO completion latency. This * also prevents long tight loops ending page writeback on all the diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 99b7209dabd7..e087818d11d4 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -113,6 +113,8 @@ struct iomap { u64 length; /* length of mapping, bytes */ u16 type; /* type of mapping */ u16 flags; /* flags for mapping */ + /* 4-byte padding hole here */ + u8 write_stream; /* write stream for I/O */ struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; -- 2.25.1 Implement support for userspace controlled write-streams. Add a new i_write_stream field in xfs inode (note: existing hole is used), and use that to implement write stream management file operations. Signed-off-by: Kanchan Joshi --- fs/xfs/xfs_file.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_icache.c | 1 + fs/xfs/xfs_inode.h | 3 +++ fs/xfs/xfs_iomap.c | 1 + 4 files changed, 59 insertions(+) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 43d088a3bceb..f3b137407a60 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -2021,6 +2021,57 @@ xfs_file_mmap_prepare( return 0; } +static struct block_device * +xfs_file_get_bdev( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ip)) + return mp->m_rtdev_targp->bt_bdev; + + return mp->m_ddev_targp->bt_bdev; +} + +static int +xfs_file_get_max_write_streams( + struct file *file) +{ + struct block_device *bdev = xfs_file_get_bdev(file_inode(file)); + + if (bdev) + return bdev_max_write_streams(bdev); + + return 0; +} + +static int +xfs_file_get_write_stream( + struct file *file) +{ + struct xfs_inode *ip = XFS_I(file_inode(file)); + + return READ_ONCE(ip->i_write_stream); +} + +static int +xfs_file_set_write_stream( + struct file *file, + unsigned long stream) +{ + struct xfs_inode *ip = XFS_I(file_inode(file)); + int max_streams = xfs_file_get_max_write_streams(file); + + if (stream > max_streams) + return -EINVAL; + xfs_ilock(ip, XFS_ILOCK_EXCL); + WRITE_ONCE(ip->i_write_stream, stream); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return 0; +} + const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, .read_iter = xfs_file_read_iter, @@ -2040,6 +2091,9 @@ const struct file_operations xfs_file_operations = { .fallocate = xfs_file_fallocate, .fadvise = xfs_file_fadvise, .remap_file_range = xfs_file_remap_range, + .get_max_write_streams = xfs_file_get_max_write_streams, + .get_write_stream = xfs_file_get_write_stream, + .set_write_stream = xfs_file_set_write_stream, .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC | FOP_DIO_PARALLEL_WRITE | FOP_DONTCACHE, diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index dbaab4ae709f..fc9c6794b7db 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -130,6 +130,7 @@ xfs_inode_alloc( spin_lock_init(&ip->i_ioend_lock); ip->i_next_unlinked = NULLAGINO; ip->i_prev_unlinked = 0; + ip->i_write_stream = 0; return ip; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index bd6d33557194..be3580fec318 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -38,6 +38,9 @@ typedef struct xfs_inode { struct xfs_ifork i_df; /* data fork */ struct xfs_ifork i_af; /* attribute fork */ + /* Write stream information */ + uint8_t i_write_stream; /* for placement, 0 = none */ + /* Transaction and locking information. */ struct xfs_inode_log_item *i_itemp; /* logging information */ struct rw_semaphore i_lock; /* inode lock */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index be86d43044df..7988c9e16635 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -148,6 +148,7 @@ xfs_bmbt_to_iomap( else iomap->bdev = target->bt_bdev; iomap->flags = iomap_flags; + iomap->write_stream = ip->i_write_stream; /* * If the inode is dirty for datasync purposes, let iomap know so it -- 2.25.1