This simplifies codes and makes logic clearer. And also makes later any new swap device type being added easier to handle. Currently there are three types of swap devices: bdev_fs, bdev_sync and bdev_async, and only operations read_folio and write_folio are included. In the future, there could be more swap device types added and more appropriate operations adapted into swap_ops. Suggested-by: Chris Li Signed-off-by: Baoquan He --- include/linux/swap.h | 3 ++ mm/page_io.c | 104 +++++++++++++++++++++++++------------------ mm/swap.h | 10 ++++- mm/swapfile.c | 9 ++++ mm/zswap.c | 2 +- 5 files changed, 83 insertions(+), 45 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7a09df6977a5..0d045fc8ec35 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -20,6 +20,8 @@ struct notifier_block; struct bio; +struct swap_ops; + #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ @@ -282,6 +284,7 @@ struct swap_info_struct { struct work_struct reclaim_work; /* reclaim worker */ struct list_head discard_clusters; /* discard clusters list */ struct plist_node avail_list; /* entry in swap_avail_head */ + const struct swap_ops *ops; }; static inline swp_entry_t page_swap_entry(struct page *page) diff --git a/mm/page_io.c b/mm/page_io.c index 70cea9e24d2f..2b520af88376 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -240,6 +240,9 @@ static void swap_zeromap_folio_clear(struct folio *folio) int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug) { int ret = 0; + struct swap_info_struct *sis = __swap_entry_to_info(folio->swap); + + VM_WARN_ON_FOLIO(!folio_test_swapcache(folio), folio); if (folio_free_swap(folio)) goto out_unlock; @@ -285,7 +288,7 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug) } rcu_read_unlock(); - __swap_writepage(folio, swap_plug); + sis->ops->write_folio(sis, folio, swap_plug); return 0; out_unlock: folio_unlock(folio); @@ -375,10 +378,11 @@ static void sio_write_complete(struct kiocb *iocb, long ret) mempool_free(sio, sio_pool); } -static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug) +static void swap_writepage_fs(struct swap_info_struct *sis, + struct folio *folio, + struct swap_iocb **swap_plug) { struct swap_iocb *sio = swap_plug ? *swap_plug : NULL; - struct swap_info_struct *sis = __swap_entry_to_info(folio->swap); struct file *swap_file = sis->swap_file; loff_t pos = swap_dev_pos(folio->swap); @@ -411,8 +415,9 @@ static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug) *swap_plug = sio; } -static void swap_writepage_bdev_sync(struct folio *folio, - struct swap_info_struct *sis) +static void swap_writepage_bdev_sync(struct swap_info_struct *sis, + struct folio *folio, + struct swap_iocb **plug) { struct bio_vec bv; struct bio bio; @@ -431,8 +436,9 @@ static void swap_writepage_bdev_sync(struct folio *folio, __end_swap_bio_write(&bio); } -static void swap_writepage_bdev_async(struct folio *folio, - struct swap_info_struct *sis) +static void swap_writepage_bdev_async(struct swap_info_struct *sis, + struct folio *folio, + struct swap_iocb **plug) { struct bio *bio; @@ -448,29 +454,6 @@ static void swap_writepage_bdev_async(struct folio *folio, submit_bio(bio); } -void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug) -{ - struct swap_info_struct *sis = __swap_entry_to_info(folio->swap); - - VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio); - /* - * ->flags can be updated non-atomically, - * but that will never affect SWP_FS_OPS, so the data_race - * is safe. - */ - if (data_race(sis->flags & SWP_FS_OPS)) - swap_writepage_fs(folio, swap_plug); - /* - * ->flags can be updated non-atomically, - * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race - * is safe. - */ - else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO)) - swap_writepage_bdev_sync(folio, sis); - else - swap_writepage_bdev_async(folio, sis); -} - void swap_write_unplug(struct swap_iocb *sio) { struct iov_iter from; @@ -539,9 +522,10 @@ static bool swap_read_folio_zeromap(struct folio *folio) return true; } -static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug) +static void swap_read_folio_fs(struct swap_info_struct *sis, + struct folio *folio, + struct swap_iocb **plug) { - struct swap_info_struct *sis = __swap_entry_to_info(folio->swap); struct swap_iocb *sio = NULL; loff_t pos = swap_dev_pos(folio->swap); @@ -573,8 +557,9 @@ static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug) *plug = sio; } -static void swap_read_folio_bdev_sync(struct folio *folio, - struct swap_info_struct *sis) +static void swap_read_folio_bdev_sync(struct swap_info_struct *sis, + struct folio *folio, + struct swap_iocb **plug) { struct bio_vec bv; struct bio bio; @@ -595,8 +580,9 @@ static void swap_read_folio_bdev_sync(struct folio *folio, put_task_struct(current); } -static void swap_read_folio_bdev_async(struct folio *folio, - struct swap_info_struct *sis) +static void swap_read_folio_bdev_async(struct swap_info_struct *sis, + struct folio *folio, + struct swap_iocb **plug) { struct bio *bio; @@ -610,6 +596,44 @@ static void swap_read_folio_bdev_async(struct folio *folio, submit_bio(bio); } +static const struct swap_ops bdev_fs_swap_ops = { + .read_folio = swap_read_folio_fs, + .write_folio = swap_writepage_fs, +}; + +static const struct swap_ops bdev_sync_swap_ops = { + .read_folio = swap_read_folio_bdev_sync, + .write_folio = swap_writepage_bdev_sync, +}; + +static const struct swap_ops bdev_async_swap_ops = { + .read_folio = swap_read_folio_bdev_async, + .write_folio = swap_writepage_bdev_async, +}; + +int init_swap_ops(struct swap_info_struct *sis) +{ + /* + * ->flags can be updated non-atomically, but that will + * never affect SWP_FS_OPS, so the data_race is safe. + */ + if (data_race(sis->flags & SWP_FS_OPS)) + sis->ops = &bdev_fs_swap_ops; + /* + * ->flags can be updated non-atomically, but that will + * never affect SWP_SYNCHRONOUS_IO, so the data_race is safe. + */ + else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO)) + sis->ops = &bdev_sync_swap_ops; + else + sis->ops = &bdev_async_swap_ops; + + if (!sis->ops || !sis->ops->read_folio || !sis->ops->write_folio) + return -EINVAL; + + return 0; +} + void swap_read_folio(struct folio *folio, struct swap_iocb **plug) { struct swap_info_struct *sis = __swap_entry_to_info(folio->swap); @@ -644,13 +668,7 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug) /* We have to read from slower devices. Increase zswap protection. */ zswap_folio_swapin(folio); - if (data_race(sis->flags & SWP_FS_OPS)) { - swap_read_folio_fs(folio, plug); - } else if (synchronous) { - swap_read_folio_bdev_sync(folio, sis); - } else { - swap_read_folio_bdev_async(folio, sis); - } + sis->ops->read_folio(sis, folio, plug); finish: if (workingset) { diff --git a/mm/swap.h b/mm/swap.h index a77016f2423b..8d4375f91632 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -217,6 +217,15 @@ extern void __swap_cluster_free_entries(struct swap_info_struct *si, /* linux/mm/page_io.c */ int sio_pool_init(void); struct swap_iocb; +struct swap_ops { + void (*read_folio)(struct swap_info_struct *sis, + struct folio *folio, + struct swap_iocb **plug); + void (*write_folio)(struct swap_info_struct *sis, + struct folio *folio, + struct swap_iocb **plug); +}; +int init_swap_ops(struct swap_info_struct *sis); void swap_read_folio(struct folio *folio, struct swap_iocb **plug); void __swap_read_unplug(struct swap_iocb *plug); static inline void swap_read_unplug(struct swap_iocb *plug) @@ -226,7 +235,6 @@ static inline void swap_read_unplug(struct swap_iocb *plug) } void swap_write_unplug(struct swap_iocb *sio); int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); -void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); /* linux/mm/swap_state.c */ extern struct address_space swap_space __read_mostly; diff --git a/mm/swapfile.c b/mm/swapfile.c index 9174f1eeffb0..29ae79d0fa2e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3597,6 +3597,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) } } + /* + * init_swap_ops() sets si->ops based on flags. It does not need + * swapon_mutex, and must complete before enable_swap_info() + * exposes the device. + */ + error = init_swap_ops(si); + if (error) + goto bad_swap_unlock_inode; + error = zswap_swapon(si->type, maxpages); if (error) goto bad_swap_unlock_inode; diff --git a/mm/zswap.c b/mm/zswap.c index 4b5149173b0e..192401f46de4 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1054,7 +1054,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, folio_set_reclaim(folio); /* start writeback */ - __swap_writepage(folio, NULL); + si->ops->write_folio(si, folio, NULL); out: if (ret && ret != -EEXIST) { -- 2.52.0 Rename the swap I/O functions to use a consistent swap___ naming scheme, so the backend type immediately follows the swap_ prefix. The new names align with swap_ops callbacks: .write_folio and .read_folio. swap_writepage_fs -> swap_fs_write_folio swap_writepage_bdev_sync -> swap_bdev_sync_write_folio swap_writepage_bdev_async -> swap_bdev_async_write_folio swap_read_folio_fs -> swap_fs_read_folio swap_read_folio_bdev_sync -> swap_bdev_sync_read_folio swap_read_folio_bdev_async -> swap_bdev_async_read_folio Signed-off-by: Baoquan He --- mm/page_io.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index 2b520af88376..38b94c560c37 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -378,7 +378,7 @@ static void sio_write_complete(struct kiocb *iocb, long ret) mempool_free(sio, sio_pool); } -static void swap_writepage_fs(struct swap_info_struct *sis, +static void swap_fs_write_folio(struct swap_info_struct *sis, struct folio *folio, struct swap_iocb **swap_plug) { @@ -415,7 +415,7 @@ static void swap_writepage_fs(struct swap_info_struct *sis, *swap_plug = sio; } -static void swap_writepage_bdev_sync(struct swap_info_struct *sis, +static void swap_bdev_sync_write_folio(struct swap_info_struct *sis, struct folio *folio, struct swap_iocb **plug) { @@ -436,7 +436,7 @@ static void swap_writepage_bdev_sync(struct swap_info_struct *sis, __end_swap_bio_write(&bio); } -static void swap_writepage_bdev_async(struct swap_info_struct *sis, +static void swap_bdev_async_write_folio(struct swap_info_struct *sis, struct folio *folio, struct swap_iocb **plug) { @@ -522,7 +522,7 @@ static bool swap_read_folio_zeromap(struct folio *folio) return true; } -static void swap_read_folio_fs(struct swap_info_struct *sis, +static void swap_fs_read_folio(struct swap_info_struct *sis, struct folio *folio, struct swap_iocb **plug) { @@ -557,7 +557,7 @@ static void swap_read_folio_fs(struct swap_info_struct *sis, *plug = sio; } -static void swap_read_folio_bdev_sync(struct swap_info_struct *sis, +static void swap_bdev_sync_read_folio(struct swap_info_struct *sis, struct folio *folio, struct swap_iocb **plug) { @@ -580,7 +580,7 @@ static void swap_read_folio_bdev_sync(struct swap_info_struct *sis, put_task_struct(current); } -static void swap_read_folio_bdev_async(struct swap_info_struct *sis, +static void swap_bdev_async_read_folio(struct swap_info_struct *sis, struct folio *folio, struct swap_iocb **plug) { @@ -597,18 +597,18 @@ static void swap_read_folio_bdev_async(struct swap_info_struct *sis, } static const struct swap_ops bdev_fs_swap_ops = { - .read_folio = swap_read_folio_fs, - .write_folio = swap_writepage_fs, + .read_folio = swap_fs_read_folio, + .write_folio = swap_fs_write_folio, }; static const struct swap_ops bdev_sync_swap_ops = { - .read_folio = swap_read_folio_bdev_sync, - .write_folio = swap_writepage_bdev_sync, + .read_folio = swap_bdev_sync_read_folio, + .write_folio = swap_bdev_sync_write_folio, }; static const struct swap_ops bdev_async_swap_ops = { - .read_folio = swap_read_folio_bdev_async, - .write_folio = swap_writepage_bdev_async, + .read_folio = swap_bdev_async_read_folio, + .write_folio = swap_bdev_async_write_folio, }; int init_swap_ops(struct swap_info_struct *sis) -- 2.52.0 When swap_ops was introduced, the FS-swap batch submission remained as a standalone swap_write_unplug() that directly called mapping->a_ops->swap_rw(). This meant callers still had implicit knowledge of filesystem internals rather than going through the swap_ops abstraction. Fix this by adding an unplug callback to struct swap_ops. Each ops table provides its own implementation: - bdev_fs_swap_ops uses the existing FS batch-submission logic - bdev_sync/bdev_async_swap_ops leave it NULL since block-layer plugging handles their I/O The swap_iocb now carries a pointer to its ops table so that swap_write_unplug() can dispatch through the callback without the caller needing to know the swap device type. Signed-off-by: Baoquan He --- mm/page_io.c | 11 ++++++++++- mm/swap.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/page_io.c b/mm/page_io.c index 38b94c560c37..2c36d261ad98 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -329,6 +329,7 @@ static void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio) struct swap_iocb { struct kiocb iocb; + const struct swap_ops *ops; struct bio_vec bvec[SWAP_CLUSTER_MAX]; int pages; int len; @@ -401,6 +402,7 @@ static void swap_fs_write_folio(struct swap_info_struct *sis, init_sync_kiocb(&sio->iocb, swap_file); sio->iocb.ki_complete = sio_write_complete; sio->iocb.ki_pos = pos; + sio->ops = sis->ops; sio->pages = 0; sio->len = 0; } @@ -454,7 +456,7 @@ static void swap_bdev_async_write_folio(struct swap_info_struct *sis, submit_bio(bio); } -void swap_write_unplug(struct swap_iocb *sio) +static void swap_fs_write_folio_unplug(struct swap_iocb *sio) { struct iov_iter from; struct address_space *mapping = sio->iocb.ki_filp->f_mapping; @@ -466,6 +468,12 @@ void swap_write_unplug(struct swap_iocb *sio) sio_write_complete(&sio->iocb, ret); } +void swap_write_unplug(struct swap_iocb *sio) +{ + if (sio->ops && sio->ops->unplug) + sio->ops->unplug(sio); +} + static void sio_read_complete(struct kiocb *iocb, long ret) { struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb); @@ -599,6 +607,7 @@ static void swap_bdev_async_read_folio(struct swap_info_struct *sis, static const struct swap_ops bdev_fs_swap_ops = { .read_folio = swap_fs_read_folio, .write_folio = swap_fs_write_folio, + .unplug = swap_fs_write_folio_unplug, }; static const struct swap_ops bdev_sync_swap_ops = { diff --git a/mm/swap.h b/mm/swap.h index 8d4375f91632..67e3b1617146 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -224,6 +224,7 @@ struct swap_ops { void (*write_folio)(struct swap_info_struct *sis, struct folio *folio, struct swap_iocb **plug); + void (*unplug)(struct swap_iocb *sio); }; int init_swap_ops(struct swap_info_struct *sis); void swap_read_folio(struct folio *folio, struct swap_iocb **plug); -- 2.52.0