Gem-shmem operates on pages instead of I/O memory ranges, so use them for mmap. This will allow for tracking page dirty/accessed flags. If hugepage support is available, insert the page's folio if possible. Otherwise fall back to mapping individual pages. As the PFN is no longer required for hugepage mappings, simplify the related code and make it depend on CONFIG_TRANSPARENT_HUGEPAGE. Prepare for tracking folio status. Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/drm_gem_shmem_helper.c | 58 ++++++++++++++++---------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 3871a6d92f77..b6ddabbfcc52 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -553,17 +553,18 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); static bool drm_gem_shmem_try_map_pmd(struct vm_fault *vmf, unsigned long addr, struct page *page) { -#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP - unsigned long pfn = page_to_pfn(page); - unsigned long paddr = pfn << PAGE_SHIFT; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + phys_addr_t paddr = page_to_phys(page); bool aligned = (addr & ~PMD_MASK) == (paddr & ~PMD_MASK); - if (aligned && - pmd_none(*vmf->pmd) && - folio_test_pmd_mappable(page_folio(page))) { - pfn &= PMD_MASK >> PAGE_SHIFT; - if (vmf_insert_pfn_pmd(vmf, pfn, false) == VM_FAULT_NOPAGE) - return true; + if (aligned && pmd_none(*vmf->pmd)) { + struct folio *folio = page_folio(page); + + if (folio_test_pmd_mappable(folio)) { + /* Read-only mapping; split upon write fault */ + if (vmf_insert_folio_pmd(vmf, folio, false) == VM_FAULT_NOPAGE) + return true; + } } #endif @@ -576,13 +577,10 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); loff_t num_pages = obj->size >> PAGE_SHIFT; - vm_fault_t ret; struct page **pages = shmem->pages; - pgoff_t page_offset; - unsigned long pfn; - - /* Offset to faulty address in the VMA. */ - page_offset = vmf->pgoff - vma->vm_pgoff; + pgoff_t page_offset = vmf->pgoff - vma->vm_pgoff; /* page offset within VMA */ + struct page *page = pages[page_offset]; + vm_fault_t ret; dma_resv_lock(shmem->base.resv, NULL); @@ -590,21 +588,35 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) drm_WARN_ON_ONCE(obj->dev, !shmem->pages) || shmem->madv < 0) { ret = VM_FAULT_SIGBUS; - goto out; + goto err_dma_resv_unlock; } - if (drm_gem_shmem_try_map_pmd(vmf, vmf->address, pages[page_offset])) { - ret = VM_FAULT_NOPAGE; - goto out; + page = pages[page_offset]; + if (!page) { + ret = VM_FAULT_SIGBUS; + goto err_dma_resv_unlock; } - pfn = page_to_pfn(pages[page_offset]); - ret = vmf_insert_pfn(vma, vmf->address, pfn); + if (drm_gem_shmem_try_map_pmd(vmf, vmf->address, page)) { + ret = VM_FAULT_NOPAGE; + } else { + struct folio *folio = page_folio(page); + + get_page(page); + + folio_lock(folio); + + vmf->page = page; + ret = VM_FAULT_LOCKED; + } - out: dma_resv_unlock(shmem->base.resv); return ret; + +err_dma_resv_unlock: + dma_resv_unlock(shmem->base.resv); + return ret; } static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) @@ -691,7 +703,7 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct if (ret) return ret; - vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + vm_flags_mod(vma, VM_DONTEXPAND | VM_DONTDUMP, VM_PFNMAP); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); if (shmem->map_wc) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -- 2.52.0 Invoke folio_mark_accessed() in mmap page faults to add the folio to the memory manager's LRU list. Userspace invokes mmap to get the memory for software rendering. Later compositors will do the same to create the final on-screen image, so keeping the pages in LRU makes sense. Avoids paging out graphics buffers when under memory pressure. In page_mkwrite, further invoke the folio_mark_dirty() to add the folio for writeback, should the underlying file be paged out from system memory. This rarely happens in practice, yet it would corrupt the buffer content. This has little effect on a system's hardware-accelerated rendering, which only mmaps for an initial setup of textures, meshes, shaders, etc. Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/drm_gem_shmem_helper.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index b6ddabbfcc52..30cd34d3a111 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -562,8 +562,10 @@ static bool drm_gem_shmem_try_map_pmd(struct vm_fault *vmf, unsigned long addr, if (folio_test_pmd_mappable(folio)) { /* Read-only mapping; split upon write fault */ - if (vmf_insert_folio_pmd(vmf, folio, false) == VM_FAULT_NOPAGE) + if (vmf_insert_folio_pmd(vmf, folio, false) == VM_FAULT_NOPAGE) { + folio_mark_accessed(folio); return true; + } } } #endif @@ -605,6 +607,7 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) get_page(page); folio_lock(folio); + folio_mark_accessed(folio); vmf->page = page; ret = VM_FAULT_LOCKED; @@ -653,10 +656,23 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) drm_gem_vm_close(vma); } +static vm_fault_t drm_gem_shmem_page_mkwrite(struct vm_fault *vmf) +{ + struct folio *folio = page_folio(vmf->page); + + file_update_time(vmf->vma->vm_file); + + folio_lock(folio); + folio_mark_dirty(folio); + + return VM_FAULT_LOCKED; +} + const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, .open = drm_gem_shmem_vm_open, .close = drm_gem_shmem_vm_close, + .page_mkwrite = drm_gem_shmem_page_mkwrite, }; EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_ops); -- 2.52.0 On successful vmap, set the page_mark_accessed_on_put and _dirty_on_put flags in the gem-shmem object. Signals that the contained pages require LRU and dirty tracking when they are being released back to SHMEM. Clear these flags on put, so that buffer remains quiet until the next call to vmap. There's no means of handling dirty status in vmap as there's no write-only mapping available. Both flags, _accessed_on_put and _dirty_on_put, have always been part of the gem-shmem object, but never used much. So most drivers did not track the page status correctly. Only the v3d and imagination drivers make limited use of _dirty_on_put. In the case of imagination, move the flag setting from init to cleanup. This ensures writeback of modified pages but does not interfere with the internal vmap/vunmap calls. V3d already implements this behaviour. Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/drm_gem_shmem_helper.c | 4 ++++ drivers/gpu/drm/imagination/pvr_gem.c | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 30cd34d3a111..8c07a8f81322 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -265,6 +265,8 @@ void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem) shmem->pages_mark_dirty_on_put, shmem->pages_mark_accessed_on_put); shmem->pages = NULL; + shmem->pages_mark_accessed_on_put = false; + shmem->pages_mark_dirty_on_put = false; } } EXPORT_SYMBOL_GPL(drm_gem_shmem_put_pages_locked); @@ -397,6 +399,8 @@ int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem, } else { iosys_map_set_vaddr(map, shmem->vaddr); refcount_set(&shmem->vmap_use_count, 1); + shmem->pages_mark_accessed_on_put = true; + shmem->pages_mark_dirty_on_put = true; } } diff --git a/drivers/gpu/drm/imagination/pvr_gem.c b/drivers/gpu/drm/imagination/pvr_gem.c index c07c9a915190..307b02c916d4 100644 --- a/drivers/gpu/drm/imagination/pvr_gem.c +++ b/drivers/gpu/drm/imagination/pvr_gem.c @@ -25,7 +25,10 @@ static void pvr_gem_object_free(struct drm_gem_object *obj) { - drm_gem_shmem_object_free(obj); + struct drm_gem_shmem_object *shmem_obj = to_drm_gem_shmem_obj(obj); + + shmem_obj->pages_mark_dirty_on_put = true; + drm_gem_shmem_free(shmem_obj); } static struct dma_buf *pvr_gem_export(struct drm_gem_object *obj, int flags) @@ -363,7 +366,6 @@ pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags) if (IS_ERR(shmem_obj)) return ERR_CAST(shmem_obj); - shmem_obj->pages_mark_dirty_on_put = true; shmem_obj->map_wc = !(flags & PVR_BO_CPU_CACHED); pvr_obj = shmem_gem_to_pvr_gem(shmem_obj); pvr_obj->flags = flags; -- 2.52.0