This patch fixes the broken readahead flow for POSIX_FADV_WILLNEED, where the problem is, in force_page_cache_ra(nr_to_read), nr_to_read is cut by the below code. max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); nr_to_read = min_t(unsigned long, nr_to_read, max_pages); IOWs, we are not able to read ahead larger than the above max_pages which is most likely the range of 2MB and 16MB. Note, it doesn't make sense to set ra->ra_pages to the entire file size. Instead, let's fix this logic. Before: f2fs_fadvise: dev = (252,16), ino = 14, i_size = 4294967296 offset:0, len:4294967296, advise:3 page_cache_ra_unbounded: dev=252:16 ino=e index=0 nr_to_read=512 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=512 nr_to_read=512 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=1024 nr_to_read=512 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=1536 nr_to_read=512 lookahead_size=0 After: f2fs_fadvise: dev = (252,16), ino = 14, i_size = 4294967296 offset:0, len:4294967296, advise:3 page_cache_ra_unbounded: dev=252:16 ino=e index=0 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=2048 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=4096 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=6144 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=8192 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=10240 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=12288 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=14336 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=16384 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=18432 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=20480 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=22528 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=24576 nr_to_read=2048 lookahead_size=0 ... page_cache_ra_unbounded: dev=252:16 ino=e index=1042432 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=1044480 nr_to_read=2048 lookahead_size=0 page_cache_ra_unbounded: dev=252:16 ino=e index=1046528 nr_to_read=2048 lookahead_size=0 Cc: linux-mm@kvack.org Cc: Matthew Wilcox (Oracle) Signed-off-by: Jaegeuk Kim --- mm/readahead.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index 3a4b5d58eeb6..c0db049a5b7b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -311,7 +311,7 @@ EXPORT_SYMBOL_GPL(page_cache_ra_unbounded); * behaviour which would occur if page allocations are causing VM writeback. * We really don't want to intermingle reads and writes like that. */ -static void do_page_cache_ra(struct readahead_control *ractl, +static int do_page_cache_ra(struct readahead_control *ractl, unsigned long nr_to_read, unsigned long lookahead_size) { struct inode *inode = ractl->mapping->host; @@ -320,45 +320,42 @@ static void do_page_cache_ra(struct readahead_control *ractl, pgoff_t end_index; /* The last page we want to read */ if (isize == 0) - return; + return -EINVAL; end_index = (isize - 1) >> PAGE_SHIFT; if (index > end_index) - return; + return -EINVAL; /* Don't read past the page containing the last byte of the file */ if (nr_to_read > end_index - index) nr_to_read = end_index - index + 1; page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size); + return 0; } /* - * Chunk the readahead into 2 megabyte units, so that we don't pin too much - * memory at once. + * Chunk the readahead per the block device capacity, and read all nr_to_read. */ void force_page_cache_ra(struct readahead_control *ractl, unsigned long nr_to_read) { struct address_space *mapping = ractl->mapping; - struct file_ra_state *ra = ractl->ra; struct backing_dev_info *bdi = inode_to_bdi(mapping->host); - unsigned long max_pages; + unsigned long this_chunk; if (unlikely(!mapping->a_ops->read_folio && !mapping->a_ops->readahead)) return; /* - * If the request exceeds the readahead window, allow the read to - * be up to the optimal hardware IO size + * Consier the optimal hardware IO size for readahead chunk. */ - max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); - nr_to_read = min_t(unsigned long, nr_to_read, max_pages); + this_chunk = max_t(unsigned long, bdi->io_pages, ractl->ra->ra_pages); + while (nr_to_read) { - unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE; + this_chunk = min_t(unsigned long, this_chunk, nr_to_read); - if (this_chunk > nr_to_read) - this_chunk = nr_to_read; - do_page_cache_ra(ractl, this_chunk, 0); + if (do_page_cache_ra(ractl, this_chunk, 0)) + break; nr_to_read -= this_chunk; } -- 2.52.0.107.ga0afd4fd5b-goog