From: Barry Song In many cases, the pages passed to vmap() may include high-order pages—for example, the systemheap often allocates pages in descending order: order 8, then 4, then 0. Currently, vmap() iterates over every page individually—even the pages inside a high-order block are handled one by one. This patch detects high-order pages and maps them as a single contiguous block whenever possible. Another possibility is to implement a new API, vmap_sg(). However, that change seems to be quite large in scope. When vmapping a 128MB dma-buf using the systemheap, this RFC appears to make system_heap_do_vmap() 16× faster: W/ patch: [ 51.363682] system_heap_do_vmap took 2474000 ns [ 53.307044] system_heap_do_vmap took 2469008 ns [ 55.061985] system_heap_do_vmap took 2519008 ns [ 56.653810] system_heap_do_vmap took 2674000 ns W/o patch: [ 8.260880] system_heap_do_vmap took 39490000 ns [ 32.513292] system_heap_do_vmap took 38784000 ns [ 82.673374] system_heap_do_vmap took 40711008 ns [ 84.579062] system_heap_do_vmap took 40236000 ns Cc: Uladzislau Rezki Cc: Sumit Semwal Cc: John Stultz Cc: Maxime Ripard Signed-off-by: Barry Song --- mm/vmalloc.c | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0832f944544c..af2e3e8c052a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -642,6 +642,34 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end, return err; } +static inline int get_vmap_batch_order(struct page **pages, + unsigned int stride, + int max_steps, + unsigned int idx) +{ + /* + * Currently, batching is only supported in vmap_pages_range + * when page_shift == PAGE_SHIFT. + */ + if (stride != 1) + return 0; + + struct page *base = pages[idx]; + if (!PageHead(base)) + return 0; + + int order = compound_order(base); + int nr_pages = 1 << order; + + if (max_steps < nr_pages) + return 0; + + for (int i = 0; i < nr_pages; i++) + if (pages[idx + i] != base + i) + return 0; + return order; +} + /* * vmap_pages_range_noflush is similar to vmap_pages_range, but does not * flush caches. @@ -655,23 +683,32 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift) { unsigned int i, nr = (end - addr) >> PAGE_SHIFT; + unsigned int stride; WARN_ON(page_shift < PAGE_SHIFT); + /* + * Some users may allocate pages from high-order down to order 0. + * We roughly check if the first page is a compound page. If so, + * there is a chance to batch multiple pages together. + */ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || - page_shift == PAGE_SHIFT) + (page_shift == PAGE_SHIFT && !PageCompound(pages[0]))) return vmap_small_pages_range_noflush(addr, end, prot, pages); - for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { - int err; + stride = 1U << (page_shift - PAGE_SHIFT); + for (i = 0; i < nr; ) { + int err, order; - err = vmap_range_noflush(addr, addr + (1UL << page_shift), + order = get_vmap_batch_order(pages, stride, nr - i, i); + err = vmap_range_noflush(addr, addr + (1UL << (page_shift + order)), page_to_phys(pages[i]), prot, - page_shift); + page_shift + order); if (err) return err; - addr += 1UL << page_shift; + addr += 1UL << (page_shift + order); + i += 1U << (order + page_shift - PAGE_SHIFT); } return 0; -- 2.39.3 (Apple Git-146)