From: Honglei Huang Introduce new ioctl AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_BATCH to support allocating multiple non-contiguous CPU virtual address ranges that map to a single contiguous GPU virtual address. This allows userspace to efficiently manage scattered memory buffers by presenting them as a unified GPU address space, useful for applications dealing with fragmented host memory. Signed-off-by: Honglei Huang --- include/uapi/linux/kfd_ioctl.h | 37 +++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 84aa24c02..c364aa971 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -442,6 +442,38 @@ struct kfd_ioctl_alloc_memory_of_gpu_args { __u32 flags; }; +/* Userptr range for batch allocation + * + * @start: start address of user virtual memory range + * @size: size of this user virtual memory range in bytes + */ +struct kfd_ioctl_userptr_range { + __u64 start; /* to KFD */ + __u64 size; /* to KFD */ +}; + +/* Allocate memory for batch of non-contiguous userptr ranges + * that map to a contiguous GPU virtual address + * + * @va_addr: contiguous GPU virtual address where all ranges will be mapped + * @total_size: total size in bytes (sum of all range sizes) + * @handle: buffer handle returned to user mode + * @ranges_ptr: pointer to array of kfd_ioctl_userptr_range structures + * @num_ranges: number of ranges in the array + * @gpu_id: device identifier + * @flags: memory type and attributes (must include KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) + */ +struct kfd_ioctl_alloc_memory_of_gpu_batch_args { + __u64 va_addr; /* to KFD */ + __u64 total_size; /* to KFD */ + __u64 handle; /* from KFD */ + __u64 ranges_ptr; /* to KFD */ + __u32 num_ranges; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 flags; /* to KFD */ + __u32 pad; +}; + /* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu * * @handle: memory handle returned by alloc @@ -1675,7 +1707,10 @@ struct kfd_ioctl_dbg_trap_args { #define AMDKFD_IOC_CREATE_PROCESS \ AMDKFD_IO(0x27) +#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_BATCH \ + AMDKFD_IOWR(0x28, struct kfd_ioctl_alloc_memory_of_gpu_batch_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x28 +#define AMDKFD_COMMAND_END 0x29 #endif -- 2.34.1 From: Honglei Huang Add necessary data structures to support tracking multiple user address ranges within a single memory object. The user_range_info structure tracks individual ranges with their own HMM ranges and MMU notifiers, allowing per-range invalidation tracking. The kgd_mem structure is extended to hold an array of these ranges. Signed-off-by: Honglei Huang --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 321cbf9a1..1883833a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -48,6 +48,7 @@ enum TLB_FLUSH_TYPE { struct amdgpu_device; struct kfd_process_device; +struct kfd_ioctl_userptr_range; struct amdgpu_reset_context; enum kfd_mem_attachment_type { @@ -67,6 +68,15 @@ struct kfd_mem_attachment { uint64_t pte_flags; }; +struct user_range_info { + uint64_t start; /* CPU virtual address start */ + uint64_t size; /* Size in bytes */ + struct hmm_range *range; /* HMM range for this userptr */ + uint32_t invalid; /* Invalidation counter */ + struct mmu_interval_notifier notifier; /* MMU notifier for this range */ + struct kgd_mem *mem; /* Back pointer to parent kgd_mem */ +}; + struct kgd_mem { struct mutex lock; struct amdgpu_bo *bo; @@ -89,6 +99,12 @@ struct kgd_mem { uint32_t gem_handle; bool aql_queue; bool is_imported; + + /* For batch userptr allocation: multiple non-contiguous CPU VA ranges + * mapped to a single contiguous GPU VA range + */ + uint32_t num_user_ranges; + struct user_range_info *user_ranges; }; /* KFD Memory Eviction */ @@ -313,6 +329,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags, bool criu_resume); +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu_batch( + struct amdgpu_device *adev, uint64_t va, uint64_t size, + void *drm_priv, struct kgd_mem **mem, + uint64_t *offset, struct kfd_ioctl_userptr_range *ranges, + uint32_t num_ranges, uint32_t flags, bool criu_resume); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size); -- 2.34.1 From: Honglei Huang Implement the core functionality for batch userptr allocation including: 1. HMM range management and MMU notifiers for multiple ranges 2. Per-range eviction and invalidation tracking 3. Batch allocation function amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu_batch() 4. Helper functions for batch page operations 5. Support for eviction and restore of batch userptr allocations 6. Unified cleanup path for both single and batch userptr Each range in a batch allocation gets its own HMM notifier and invalidation tracking, while all ranges are validated together and mapped to a contiguous GPU virtual address space. Signed-off-by: Honglei Huang --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 543 +++++++++++++++++- 1 file changed, 524 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a32b46355..67a6519dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1122,6 +1122,256 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return ret; } +/* Evict a single range from batch userptr BO + * + * Called when one of the non-contiguous VA ranges is invalidated. + * Similar to amdgpu_amdkfd_evict_userptr but for individual ranges. + */ +static int amdgpu_amdkfd_evict_userptr_range(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct user_range_info *range_info; + struct kgd_mem *mem; + struct amdkfd_process_info *process_info; + int r = 0; + + range_info = container_of(mni, struct user_range_info, notifier); + mem = range_info->mem; + process_info = mem->process_info; + + if (READ_ONCE(process_info->block_mmu_notifications)) + return 0; + + mutex_lock(&process_info->notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + range_info->invalid++; + mem->invalid++; + + if (++process_info->evicted_bos == 1) { + r = kgd2kfd_quiesce_mm(mni->mm, + KFD_QUEUE_EVICTION_TRIGGER_USERPTR); + + if (r && r != -ESRCH) + pr_err("Failed to quiesce KFD\n"); + + if (r != -ESRCH) + queue_delayed_work(system_freezable_wq, + &process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + } + mutex_unlock(&process_info->notifier_lock); + + pr_debug("Batch userptr range evicted: range %d, addr 0x%llx, size 0x%llx\n", + (int)(range_info - mem->user_ranges), range_info->start, range_info->size); + + return r; +} + +static bool amdgpu_amdkfd_invalidate_userptr_range(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + amdgpu_amdkfd_evict_userptr_range(mni, range, cur_seq); + return true; +} + +static const struct mmu_interval_notifier_ops amdgpu_amdkfd_hsa_range_ops = { + .invalidate = amdgpu_amdkfd_invalidate_userptr_range, +}; + +static int get_user_pages_batch(struct mm_struct *mm, + struct user_range_info *range, + struct hmm_range **range_hmm, bool readonly) +{ + struct vm_area_struct *vma; + int r = 0; + + *range_hmm = NULL; + + if (!mmget_not_zero(mm)) + return -ESRCH; + + mmap_read_lock(mm); + vma = vma_lookup(mm, range->start); + if (unlikely(!vma)) { + r = -EFAULT; + goto out_unlock; + } + + r = amdgpu_hmm_range_get_pages(&range->notifier, range->start, + range->size >> PAGE_SHIFT, readonly, + NULL, range_hmm); + +out_unlock: + mmap_read_unlock(mm); + mmput(mm); + return r; +} + +static int set_user_pages_batch(struct ttm_tt *ttm, + struct user_range_info *ranges, + uint32_t nranges) +{ + uint32_t i, j, k = 0, range_npfns; + + for (i = 0; i < nranges; ++i) { + if (!ranges[i].range || !ranges[i].range->hmm_pfns) + return -EINVAL; + + range_npfns = (ranges[i].range->end - ranges[i].range->start) >> + PAGE_SHIFT; + + if (k + range_npfns > ttm->num_pages) + return -EOVERFLOW; + + for (j = 0; j < range_npfns; ++j) + ttm->pages[k++] = + hmm_pfn_to_page(ranges[i].range->hmm_pfns[j]); + } + + return 0; +} + +/* Initialize batch of non-contiguous userptr ranges. + * Each range gets its own HMM notifier and page tracking. + * All ranges will be validated together and mapped to a contiguous GPU VA. + * + * Similar to init_user_pages but handles multiple ranges. + * Returns 0 for success, negative errno for errors. + */ +static int init_user_pages_batch(struct kgd_mem *mem, + struct kfd_ioctl_userptr_range *ranges, + uint32_t num_ranges, bool criu_resume, + uint64_t user_addr, uint32_t size) +{ + struct amdkfd_process_info *process_info = mem->process_info; + struct amdgpu_bo *bo = mem->bo; + struct ttm_operation_ctx ctx = { true, false }; + struct hmm_range *range; + int ret = 0; + uint32_t i; + + if (!num_ranges || !ranges) + return -EINVAL; + + mutex_lock(&process_info->lock); + + mem->user_ranges = kvcalloc(num_ranges, sizeof(struct user_range_info), + GFP_KERNEL); + + if (!mem->user_ranges) { + ret = -ENOMEM; + goto out; + } + mem->num_user_ranges = num_ranges; + + ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0); + if (ret) { + pr_err("%s: Failed to set userptr: %d\n", __func__, ret); + goto out; + } + + /* Process each range: register HMM and get pages */ + for (i = 0; i < num_ranges; i++) { + mem->user_ranges[i].start = ranges[i].start; + mem->user_ranges[i].size = ranges[i].size; + + pr_debug("Initializing userptr range %u: addr=0x%llx size=0x%llx\n", + i, mem->user_ranges[i].start, mem->user_ranges[i].size); + + mem->user_ranges[i].mem = mem; + + ret = mmu_interval_notifier_insert(&mem->user_ranges[i].notifier, + current->mm, mem->user_ranges[i].start, mem->user_ranges[i].size, + &amdgpu_amdkfd_hsa_range_ops); + if (ret) { + pr_err("%s: Failed to register range MMU notifier for range %u: %d\n", + __func__, i, ret); + while (i > 0) { + i--; + if (mem->user_ranges[i].notifier.mm) { + mmu_interval_notifier_remove( + &mem->user_ranges[i].notifier); + mem->user_ranges[i].notifier.mm = NULL; + } + } + goto err_cleanup_ranges; + } + + if (criu_resume) { + mutex_lock(&process_info->notifier_lock); + mem->user_ranges[i].invalid++; + mutex_unlock(&process_info->notifier_lock); + continue; + } + + ret = get_user_pages_batch( + current->mm, &mem->user_ranges[i], &range, + amdgpu_ttm_tt_is_readonly(bo->tbo.ttm)); + if (ret) { + if (ret == -EAGAIN) + pr_debug("Failed to get user pages for range %u, try again\n", i); + else + pr_err("%s: Failed to get user pages for range %u: %d\n", + __func__, i, ret); + goto err_unregister; + } + + mem->user_ranges[i].range = range; + } + + if (criu_resume) { + mutex_unlock(&process_info->lock); + return 0; + } + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("%s: Failed to reserve BO\n", __func__); + goto release_pages; + } + + if (bo->tbo.ttm->pages) { + set_user_pages_batch(bo->tbo.ttm, + mem->user_ranges, + num_ranges); + } else { + pr_err("%s: TTM pages array is NULL\n", __func__); + ret = -EINVAL; + amdgpu_bo_unreserve(bo); + goto release_pages; + } + + amdgpu_bo_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + pr_err("%s: failed to validate BO\n", __func__); + + amdgpu_bo_unreserve(bo); + +release_pages: + for (i = 0; i < num_ranges; i++) { + if (mem->user_ranges[i].range) { + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, + mem->user_ranges[i].range); + } + } + +err_unregister: +err_cleanup_ranges: + if (ret) { + for (i = 0; i < num_ranges; i++) { + mem->user_ranges[i].range = NULL; + } + } + +out: + mutex_unlock(&process_info->lock); + return ret; +} + /* Reserving a BO and its page table BOs must happen atomically to * avoid deadlocks. Some operations update multiple VMs at once. Track * all the reservation info in a context structure. Optionally a sync @@ -1880,6 +2130,177 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return ret; } +/* Allocate memory for batch of non-contiguous userptr ranges. + * All ranges will be mapped to a single contiguous GPU VA. + * + * @adev: amdgpu device pointer + * @va: contiguous GPU virtual address for mapping + * @size: total size (sum of all range sizes) + * @drm_priv: drm private data + * @mem: resulting kgd_mem pointer + * @offset: not used for batch userptr + * @ranges: array of userptr ranges + * @num_ranges: number of ranges + * @flags: allocation flags (must include KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) + * @criu_resume: whether this is for CRIU restore + */ +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu_batch( + struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, + struct kgd_mem **mem, uint64_t *offset, + struct kfd_ioctl_userptr_range *ranges, uint32_t num_ranges, + uint32_t flags, bool criu_resume) +{ + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); + struct amdgpu_bo *bo; + struct drm_gem_object *gobj = NULL; + u32 domain, alloc_domain; + uint64_t aligned_size; + int8_t xcp_id = -1; + u64 alloc_flags; + int ret; + + if (!(flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { + pr_err("Batch allocation requires USERPTR flag\n"); + return -EINVAL; + } + + /* Batch userptr does not support AQL queue */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) { + pr_err("Batch userptr does not support AQL queue\n"); + return -EINVAL; + } + + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE; + + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) + alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT) + alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) + alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) { + ret = -ENOMEM; + goto err; + } + INIT_LIST_HEAD(&(*mem)->attachments); + mutex_init(&(*mem)->lock); + (*mem)->aql_queue = false; + + aligned_size = PAGE_ALIGN(size); + + (*mem)->alloc_flags = flags; + + amdgpu_sync_create(&(*mem)->sync); + + ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags, + xcp_id); + if (ret) { + pr_debug("Insufficient memory\n"); + goto err_reserve_limit; + } + + pr_debug("\tcreate BO VA 0x%llx size 0x%llx for batch userptr (ranges=%u)\n", + va, size, num_ranges); + + ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags, + ttm_bo_type_device, NULL, &gobj, xcp_id + 1); + if (ret) { + pr_debug("Failed to create BO on domain %s. ret %d\n", + domain_string(alloc_domain), ret); + goto err_bo_create; + } + + ret = drm_vma_node_allow(&gobj->vma_node, drm_priv); + if (ret) { + pr_debug("Failed to allow vma node access. ret %d\n", ret); + goto err_node_allow; + } + + ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle); + if (ret) + goto err_gem_handle_create; + + bo = gem_to_amdgpu_bo(gobj); + bo->kfd_bo = *mem; + bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO; + + (*mem)->bo = bo; + (*mem)->va = va; + (*mem)->domain = domain; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = avm->process_info; + + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, ranges[0].start); + + pr_debug("Initializing batch userptr with %u ranges\n", num_ranges); + ret = init_user_pages_batch(*mem, ranges, num_ranges, criu_resume, va, aligned_size); + if (ret) { + pr_err("Failed to initialize batch user pages: %d\n", ret); + goto allocate_init_user_pages_failed; + } + + return 0; + +allocate_init_user_pages_failed: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle); +err_gem_handle_create: + drm_vma_node_revoke(&gobj->vma_node, drm_priv); +err_node_allow: + /* Don't unreserve system mem limit twice */ + goto err_reserve_limit; +err_bo_create: + amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); +err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); + mutex_destroy(&(*mem)->lock); + if (gobj) + drm_gem_object_put(gobj); + else + kfree(*mem); +err: + return ret; +} + +/* Cleanup userptr resources during BO free + * + * Handles both single and batch userptr: unregisters MMU notifiers, + * discards user pages, and frees userptr-related structures. + */ +static void cleanup_userptr_resources(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + if (!amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) + return; + + if (mem->num_user_ranges > 0 && mem->user_ranges) { + /* Batch userptr: cleanup all ranges */ + uint32_t i; + + for (i = 0; i < mem->num_user_ranges; i++) { + if (mem->user_ranges[i].notifier.mm) { + mmu_interval_notifier_remove( + &mem->user_ranges[i].notifier); + mem->user_ranges[i].notifier.mm = NULL; + } + } + + kvfree(mem->user_ranges); + mem->user_ranges = NULL; + mem->num_user_ranges = 0; + } else { + /* Single userptr: cleanup single range */ + amdgpu_hmm_unregister(mem->bo); + mutex_lock(&process_info->notifier_lock); + amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range); + mutex_unlock(&process_info->notifier_lock); + } +} + int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size) @@ -1920,13 +2341,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&mem->validate_list); mutex_unlock(&process_info->lock); - /* Cleanup user pages and MMU notifiers */ - if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { - amdgpu_hmm_unregister(mem->bo); - mutex_lock(&process_info->notifier_lock); - amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range); - mutex_unlock(&process_info->notifier_lock); - } + /* Cleanup userptr resources */ + cleanup_userptr_resources(mem, process_info); ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) @@ -2513,6 +2929,50 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, return r; } +static void discard_user_pages_batch(struct amdgpu_bo *bo, struct kgd_mem *mem) +{ + uint32_t i; + + for (i = 0; i < mem->num_user_ranges; i++) { + if (mem->user_ranges[i].range) { + amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, + mem->user_ranges[i].range); + mem->user_ranges[i].range = NULL; + } + } +} + +static int amdgpu_amdkfd_update_user_pages_batch(struct mm_struct *mm, + struct amdgpu_bo *bo, + struct kgd_mem *mem) +{ + uint32_t i; + int ret = 0; + bool all_success = true; + + for (i = 0; i < mem->num_user_ranges; i++) { + if (!mem->user_ranges[i].invalid) + continue; + + ret = get_user_pages_batch( + mm, &mem->user_ranges[i], + &mem->user_ranges[i].range, + amdgpu_ttm_tt_is_readonly(bo->tbo.ttm)); + if (ret) { + pr_debug("Failed %d to get user pages for range %u\n", + ret, i); + all_success = false; + break; + } + mem->user_ranges[i].invalid = 0; + } + + if (!all_success) + ret = ret ? ret : -EFAULT; + + return ret; +} + /* Update invalid userptr BOs * * Moves invalidated (evicted) userptr BOs from userptr_valid_list to @@ -2550,8 +3010,13 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; - amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range); - mem->range = NULL; + /* Discard old user pages */ + if (mem->num_user_ranges > 0 && mem->user_ranges) + discard_user_pages_batch(bo, mem); + else { + amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range); + mem->range = NULL; + } /* BO reservations and getting user pages (hmm_range_fault) * must happen outside the notifier lock @@ -2574,8 +3039,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, } } - /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range); + if (mem->num_user_ranges > 0 && mem->user_ranges) + ret = amdgpu_amdkfd_update_user_pages_batch(mm, bo, mem); + else + ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range); + if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2609,7 +3077,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, ret = 0; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range); + if (mem->num_user_ranges == 0) + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range); + else + set_user_pages_batch(bo->tbo.ttm, mem->user_ranges, mem->num_user_ranges); mutex_lock(&process_info->notifier_lock); @@ -2730,6 +3201,34 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) return ret; } +/* Confirm that batch userptr ranges are valid + * + * Checks all ranges in a batch userptr BO and releases hmm_range structures. + * Returns true if all ranges are valid, false otherwise. + */ +static bool valid_user_pages_batch(struct kgd_mem *mem) +{ + uint32_t i; + bool all_valid = true; + + if (!mem->user_ranges || mem->num_user_ranges == 0) + return true; + + for (i = 0; i < mem->num_user_ranges; i++) { + if (!mem->user_ranges[i].range) + continue; + + if (!amdgpu_ttm_tt_get_user_pages_done(mem->bo->tbo.ttm, + mem->user_ranges[i].range)) { + all_valid = false; + } + + mem->user_ranges[i].range = NULL; + } + + return all_valid; +} + /* Confirm that all user pages are valid while holding the notifier lock * * Moves valid BOs from the userptr_inval_list back to userptr_val_list. @@ -2744,15 +3243,21 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i validate_list) { bool valid; - /* keep mem without hmm range at userptr_inval_list */ - if (!mem->range) - continue; + /* Handle batch userptr: check all ranges */ + if (mem->num_user_ranges > 0 && mem->user_ranges) + valid = valid_user_pages_batch(mem); + else { + /* keep mem without hmm range at userptr_inval_list */ + if (!mem->range) + continue; - /* Only check mem with hmm range associated */ - valid = amdgpu_ttm_tt_get_user_pages_done( - mem->bo->tbo.ttm, mem->range); + /* Only check mem with hmm range associated */ + valid = amdgpu_ttm_tt_get_user_pages_done( + mem->bo->tbo.ttm, mem->range); + + mem->range = NULL; + } - mem->range = NULL; if (!valid) { WARN(!mem->invalid, "Invalid BO not marked invalid"); ret = -EAGAIN; -- 2.34.1 From: Honglei Huang Add the ioctl handler for AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_BATCH that processes userspace requests for batch userptr allocation. The handler performs validation of input parameters including: - Checking all ranges are page-aligned and non-zero - Verifying total size matches sum of range sizes - Ensuring no conflicts with existing SVM allocations - Validating that USERPTR flag is set This completes the batch userptr feature by connecting the UAPI to the implementation. Signed-off-by: Honglei Huang --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 159 +++++++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a72cc980a..d8cfd8697 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1196,6 +1196,162 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return err; } +static int kfd_ioctl_alloc_memory_of_gpu_batch(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_alloc_memory_of_gpu_batch_args *args = data; + struct kfd_ioctl_userptr_range *ranges = NULL; + struct kfd_process_device *pdd; + void *mem; + struct kfd_node *dev; + int idr_handle; + long err; + uint32_t flags = args->flags; + uint32_t i; + uint64_t total_size = 0; + + if (args->total_size == 0) { + pr_err("Batch allocation: total size cannot be zero\n"); + return -EINVAL; + } + + if (args->num_ranges == 0) { + pr_err("Batch allocation: invalid number of ranges %u\n", + args->num_ranges); + return -EINVAL; + } + + if (!args->ranges_ptr) { + pr_err("Batch allocation: ranges pointer is NULL\n"); + return -EINVAL; + } + + if (!(flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { + pr_err("Batch allocation requires USERPTR flag\n"); + return -EOPNOTSUPP; + } + + if (p->context_id != KFD_CONTEXT_ID_PRIMARY) { + pr_debug("Batch USERPTR is not supported on non-primary kfd_process\n"); + return -EOPNOTSUPP; + } + + ranges = kvmalloc_array(args->num_ranges, sizeof(*ranges), GFP_KERNEL); + if (!ranges) { + err = -ENOMEM; + goto err_alloc_ranges; + } + + if (copy_from_user(ranges, (void __user *)args->ranges_ptr, + args->num_ranges * sizeof(*ranges))) { + pr_err("Failed to copy ranges from user space\n"); + err = -EFAULT; + goto err_copy_ranges; + } + + for (i = 0; i < args->num_ranges; i++) { + if (!ranges[i].start || !ranges[i].size || + (ranges[i].start & ~PAGE_MASK) || + (ranges[i].size & ~PAGE_MASK)) { + pr_err("Invalid range %u: start=0x%llx size=0x%llx\n", + i, ranges[i].start, ranges[i].size); + err = -EINVAL; + goto err_copy_ranges; + } + total_size += ranges[i].size; + } + + if (total_size != args->total_size) { + pr_err("Size mismatch: provided %llu != calculated %llu\n", + args->total_size, total_size); + err = -EINVAL; + goto err_copy_ranges; + } + +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + /* Check for conflicts with SVM */ + svm_range_list_lock_and_flush_work(&p->svms, current->mm); + mutex_lock(&p->svms.lock); + mmap_write_unlock(current->mm); + + /* Check GPU VA for SVM conflicts */ + if (args->va_addr && + interval_tree_iter_first(&p->svms.objects, + args->va_addr >> PAGE_SHIFT, + (args->va_addr + args->total_size - 1) >> PAGE_SHIFT)) { + pr_err("GPU VA 0x%llx already allocated by SVM\n", args->va_addr); + mutex_unlock(&p->svms.lock); + err = -EADDRINUSE; + goto err_copy_ranges; + } + + /* Check each userptr range for SVM conflicts */ + for (i = 0; i < args->num_ranges; i++) { + if (interval_tree_iter_first(&p->svms.objects, + ranges[i].start >> PAGE_SHIFT, + (ranges[i].start + ranges[i].size - 1) >> PAGE_SHIFT)) { + pr_err("Userptr range %u (0x%llx) already allocated by SVM\n", + i, ranges[i].start); + mutex_unlock(&p->svms.lock); + err = -EADDRINUSE; + goto err_copy_ranges; + } + } + + mutex_unlock(&p->svms.lock); +#endif + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, args->gpu_id); + if (!pdd) { + err = -EINVAL; + goto err_pdd; + } + + dev = pdd->dev; + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto err_unlock; + } + + err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu_batch( + dev->adev, args->va_addr, args->total_size, pdd->drm_priv, + (struct kgd_mem **)&mem, NULL, ranges, args->num_ranges, + flags, false); + + if (err) + goto err_unlock; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + if (idr_handle < 0) { + err = -EFAULT; + goto err_free; + } + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + + mutex_unlock(&p->mutex); + kvfree(ranges); + + pr_debug("Batch userptr allocated: va=0x%llx size=0x%llx ranges=%u handle=0x%llx\n", + args->va_addr, args->total_size, args->num_ranges, args->handle); + + return 0; + +err_free: + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, + pdd->drm_priv, NULL); +err_unlock: +err_pdd: + mutex_unlock(&p->mutex); +err_copy_ranges: + kvfree(ranges); +err_alloc_ranges: + return err; +} + static int kfd_ioctl_free_memory_of_gpu(struct file *filep, struct kfd_process *p, void *data) { @@ -3309,6 +3465,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_PROCESS, kfd_ioctl_create_process, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_BATCH, + kfd_ioctl_alloc_memory_of_gpu_batch, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) -- 2.34.1