xe_vm_range_tilemask_tlb_inval() submits TLB invalidation requests to all GTs in a tile mask and then immediately waits for them to complete before returning. This is fine for the existing callers, but a subsequent patch will need to defer the wait in order to overlap TLB invalidations across multiple VMAs. Introduce xe_tlb_inval_range_tilemask_submit() and xe_tlb_inval_batch_wait() in xe_tlb_inval.c as the submit and wait halves respectively. The batch of fences is carried in the new xe_tlb_inval_batch structure. Remove xe_vm_range_tilemask_tlb_inval() and convert all three call sites to the new API. Assisted-by: GitHub Copilot:claude-sonnet-4.6 Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_svm.c | 6 +- drivers/gpu/drm/xe/xe_tlb_inval.c | 82 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_tlb_inval.h | 6 ++ drivers/gpu/drm/xe/xe_tlb_inval_types.h | 14 +++++ drivers/gpu/drm/xe/xe_vm.c | 69 +++------------------ drivers/gpu/drm/xe/xe_vm.h | 3 - drivers/gpu/drm/xe/xe_vm_madvise.c | 9 ++- drivers/gpu/drm/xe/xe_vm_types.h | 1 + 8 files changed, 123 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 002b6c22ad3f..6ea4972c2791 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -19,6 +19,7 @@ #include "xe_pt.h" #include "xe_svm.h" #include "xe_tile.h" +#include "xe_tlb_inval.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" @@ -225,6 +226,7 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, const struct mmu_notifier_range *mmu_range) { struct xe_vm *vm = gpusvm_to_vm(gpusvm); + struct xe_tlb_inval_batch _batch; struct xe_device *xe = vm->xe; struct drm_gpusvm_range *r, *first; struct xe_tile *tile; @@ -276,7 +278,9 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, xe_device_wmb(xe); - err = xe_vm_range_tilemask_tlb_inval(vm, adj_start, adj_end, tile_mask); + err = xe_tlb_inval_range_tilemask_submit(xe, vm->usm.asid, adj_start, adj_end, + tile_mask, &_batch); + xe_tlb_inval_batch_wait(&_batch); WARN_ON_ONCE(err); range_notifier_event_end: diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c index 933f30fb617d..343e37cfe715 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -486,3 +486,85 @@ bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval) guard(spinlock_irq)(&tlb_inval->pending_lock); return list_is_singular(&tlb_inval->pending_fences); } + +/** + * xe_tlb_inval_batch_wait() - Wait for all fences in a TLB invalidation batch + * @batch: Batch of TLB invalidation fences to wait on + * + * Waits for every fence in @batch to signal, then resets @batch so it can be + * reused for a subsequent invalidation. + */ +void xe_tlb_inval_batch_wait(struct xe_tlb_inval_batch *batch) +{ + struct xe_tlb_inval_fence *fence = &batch->fence[0]; + unsigned int i; + + for (i = 0; i < batch->num_fences; ++i) + xe_tlb_inval_fence_wait(fence++); + + batch->num_fences = 0; +} + +/** + * xe_tlb_inval_range_tilemask_submit() - Submit TLB invalidations for an + * address range on a tile mask + * @xe: The xe device + * @asid: Address space ID + * @start: start address + * @end: end address + * @tile_mask: mask for which gt's issue tlb invalidation + * @batch: Batch of tlb invalidate fences + * + * Issue a range based TLB invalidation for gt's in tilemask + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_tlb_inval_range_tilemask_submit(struct xe_device *xe, u32 asid, + u64 start, u64 end, u8 tile_mask, + struct xe_tlb_inval_batch *batch) +{ + struct xe_tlb_inval_fence *fence = &batch->fence[0]; + struct xe_tile *tile; + u32 fence_id = 0; + u8 id; + int err; + + batch->num_fences = 0; + if (!tile_mask) + return 0; + + for_each_tile(tile, xe, id) { + if (!(tile_mask & BIT(id))) + continue; + + xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, + &fence[fence_id], true); + + err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, + &fence[fence_id], start, end, + asid, NULL); + if (err) + goto wait; + ++fence_id; + + if (!tile->media_gt) + continue; + + xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, + &fence[fence_id], true); + + err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, + &fence[fence_id], start, end, + asid, NULL); + if (err) + goto wait; + ++fence_id; + } + +wait: + batch->num_fences = fence_id; + if (err) + xe_tlb_inval_batch_wait(batch); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h index 62089254fa23..a76b7823a5f2 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval.h @@ -45,4 +45,10 @@ void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno); bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval); +int xe_tlb_inval_range_tilemask_submit(struct xe_device *xe, u32 asid, + u64 start, u64 end, u8 tile_mask, + struct xe_tlb_inval_batch *batch); + +void xe_tlb_inval_batch_wait(struct xe_tlb_inval_batch *batch); + #endif /* _XE_TLB_INVAL_ */ diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h index 3b089f90f002..3d1797d186fd 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_types.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h @@ -9,6 +9,8 @@ #include #include +#include "xe_device_types.h" + struct drm_suballoc; struct xe_tlb_inval; @@ -132,4 +134,16 @@ struct xe_tlb_inval_fence { ktime_t inval_time; }; +/** + * struct xe_tlb_inval_batch - Batch of TLB invalidation fences + * + * Holds one fence per GT covered by a TLB invalidation request. + */ +struct xe_tlb_inval_batch { + /** @fence: per-GT TLB invalidation fences */ + struct xe_tlb_inval_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + /** @num_fences: number of valid entries in @fence */ + unsigned int num_fences; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 548b0769b3ef..7f29d2b2972d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3966,66 +3966,6 @@ void xe_vm_unlock(struct xe_vm *vm) dma_resv_unlock(xe_vm_resv(vm)); } -/** - * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an - * address range - * @vm: The VM - * @start: start address - * @end: end address - * @tile_mask: mask for which gt's issue tlb invalidation - * - * Issue a range based TLB invalidation for gt's in tilemask - * - * Returns 0 for success, negative error code otherwise. - */ -int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, - u64 end, u8 tile_mask) -{ - struct xe_tlb_inval_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; - struct xe_tile *tile; - u32 fence_id = 0; - u8 id; - int err; - - if (!tile_mask) - return 0; - - for_each_tile(tile, vm->xe, id) { - if (!(tile_mask & BIT(id))) - continue; - - xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, - &fence[fence_id], true); - - err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, - &fence[fence_id], start, end, - vm->usm.asid, NULL); - if (err) - goto wait; - ++fence_id; - - if (!tile->media_gt) - continue; - - xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, - &fence[fence_id], true); - - err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, - &fence[fence_id], start, end, - vm->usm.asid, NULL); - if (err) - goto wait; - ++fence_id; - } - -wait: - for (id = 0; id < fence_id; ++id) - xe_tlb_inval_fence_wait(&fence[id]); - - return err; -} - /** * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock * @vma: VMA to invalidate @@ -4040,6 +3980,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_vm *vm = xe_vma_vm(vma); + struct xe_tlb_inval_batch _batch; struct xe_tile *tile; u8 tile_mask = 0; int ret = 0; @@ -4080,12 +4021,16 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) xe_device_wmb(xe); - ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), - xe_vma_end(vma), tile_mask); + ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid, + xe_vma_start(vma), xe_vma_end(vma), + tile_mask, &_batch); /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); + if (!ret) + xe_tlb_inval_batch_wait(&_batch); + return ret; } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index f849e369432b..62f4b6fec0bc 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -240,9 +240,6 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, struct xe_svm_range *range); -int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, - u64 end, u8 tile_mask); - int xe_vm_invalidate_vma(struct xe_vma *vma); int xe_vm_validate_protected(struct xe_vm *vm); diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 95bf53cc29e3..39717026e84f 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -12,6 +12,7 @@ #include "xe_pat.h" #include "xe_pt.h" #include "xe_svm.h" +#include "xe_tlb_inval.h" struct xe_vmas_in_madvise_range { u64 addr; @@ -235,13 +236,19 @@ static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) { u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end); + struct xe_tlb_inval_batch batch; + int err; if (!tile_mask) return 0; xe_device_wmb(vm->xe); - return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask); + err = xe_tlb_inval_range_tilemask_submit(vm->xe, vm->usm.asid, start, end, + tile_mask, &batch); + xe_tlb_inval_batch_wait(&batch); + + return err; } static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1f6f7e30e751..de6544165cfa 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -18,6 +18,7 @@ #include "xe_device_types.h" #include "xe_pt_types.h" #include "xe_range_fence.h" +#include "xe_tlb_inval_types.h" #include "xe_userptr.h" struct drm_pagemap; -- 2.53.0