Change folio_zero_user() to clear contiguous page ranges instead of clearing using the current page-at-a-time approach. Exposing the largest feasible length can be useful in enabling processors to optimize based on extent. However, clearing in large chunks can have two problems: - cache locality when clearing small folios (< MAX_ORDER_NR_PAGES) (larger folios don't have any expectation of cache locality). - preemption latency when clearing large folios. Handle the first by splitting the clearing in three parts: the faulting page and its immediate locality, its left and right regions; with the local neighbourhood cleared last. The second problem becomes relevant when running under cooperative preemption models. Limit the worst case preemption latency by clearing in architecture specified PAGE_CONTIG_NR units, using a default value of 1 where not specified. Signed-off-by: Ankur Arora --- include/linux/mm.h | 6 ++++ mm/memory.c | 82 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 21 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0cde9b01da5e..29b2a8bf7b4f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3768,6 +3768,12 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, unsigned int order) {} #endif /* CONFIG_DEBUG_PAGEALLOC */ +#ifndef ARCH_PAGE_CONTIG_NR +#define PAGE_CONTIG_NR 1 +#else +#define PAGE_CONTIG_NR ARCH_PAGE_CONTIG_NR +#endif + #ifndef clear_pages /** * clear_pages() - clear a page range using a kernel virtual address. diff --git a/mm/memory.c b/mm/memory.c index 0ba4f6b71847..0f5b1900b480 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -7021,40 +7021,80 @@ static inline int process_huge_page( return 0; } -static void clear_gigantic_page(struct folio *folio, unsigned long addr_hint, - unsigned int nr_pages) +/* + * Clear contiguous pages chunking them up when running under + * non-preemptible models. + */ +static void clear_contig_highpages(struct page *page, unsigned long addr, + unsigned int npages) { - unsigned long addr = ALIGN_DOWN(addr_hint, folio_size(folio)); - int i; + unsigned int i, count, unit; - might_sleep(); - for (i = 0; i < nr_pages; i++) { + unit = preempt_model_preemptible() ? npages : PAGE_CONTIG_NR; + + for (i = 0; i < npages; ) { + count = min(unit, npages - i); + clear_user_highpages(nth_page(page, i), + addr + i * PAGE_SIZE, count); + i += count; cond_resched(); - clear_user_highpage(folio_page(folio, i), addr + i * PAGE_SIZE); } } -static int clear_subpage(unsigned long addr, int idx, void *arg) -{ - struct folio *folio = arg; - - clear_user_highpage(folio_page(folio, idx), addr); - return 0; -} - /** * folio_zero_user - Zero a folio which will be mapped to userspace. * @folio: The folio to zero. - * @addr_hint: The address will be accessed or the base address if uncelar. + * @addr_hint: The address accessed by the user or the base address. + * + * Uses architectural support for clear_pages() to zero page extents + * instead of clearing page-at-a-time. + * + * Clearing of small folios (< MAX_ORDER_NR_PAGES) is split in three parts: + * pages in the immediate locality of the faulting page, and its left, right + * regions; the local neighbourhood cleared last in order to keep cache + * lines of the target region hot. + * + * For larger folios we assume that there is no expectation of cache locality + * and just do a straight zero. */ void folio_zero_user(struct folio *folio, unsigned long addr_hint) { - unsigned int nr_pages = folio_nr_pages(folio); + unsigned long base_addr = ALIGN_DOWN(addr_hint, folio_size(folio)); + const long fault_idx = (addr_hint - base_addr) / PAGE_SIZE; + const struct range pg = DEFINE_RANGE(0, folio_nr_pages(folio) - 1); + const int width = 2; /* number of pages cleared last on either side */ + struct range r[3]; + int i; - if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) - clear_gigantic_page(folio, addr_hint, nr_pages); - else - process_huge_page(addr_hint, nr_pages, clear_subpage, folio); + if (folio_nr_pages(folio) > MAX_ORDER_NR_PAGES) { + clear_contig_highpages(folio_page(folio, 0), + base_addr, folio_nr_pages(folio)); + return; + } + + /* + * Faulting page and its immediate neighbourhood. Cleared at the end to + * ensure it sticks around in the cache. + */ + r[2] = DEFINE_RANGE(clamp_t(s64, fault_idx - width, pg.start, pg.end), + clamp_t(s64, fault_idx + width, pg.start, pg.end)); + + /* Region to the left of the fault */ + r[1] = DEFINE_RANGE(pg.start, + clamp_t(s64, r[2].start-1, pg.start-1, r[2].start)); + + /* Region to the right of the fault: always valid for the common fault_idx=0 case. */ + r[0] = DEFINE_RANGE(clamp_t(s64, r[2].end+1, r[2].end, pg.end+1), + pg.end); + + for (i = 0; i <= 2; i++) { + unsigned int npages = range_len(&r[i]); + struct page *page = folio_page(folio, r[i].start); + unsigned long addr = base_addr + folio_page_idx(folio, page) * PAGE_SIZE; + + if (npages > 0) + clear_contig_highpages(page, addr, npages); + } } static int copy_user_gigantic_page(struct folio *dst, struct folio *src, -- 2.43.5