Merge tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux into drm-next
Major points for this pull request: - Add dGPU support for amdkfd initialization code and queue handling. It's not complete support since the GPUVM part is missing (the under debate stuff). - Enable PCIe atomics for dGPU if present - Various adjustments to the amdgpu<-->amdkfd interface for dGPUs - Refactor IOMMUv2 code to allow loading amdkfd without IOMMUv2 in the system - Add HSA process eviction code in case of system memory pressure - Various fixes and small changes * tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux: (24 commits) uapi: Fix type used in ioctl parameter structures drm/amdkfd: Implement KFD process eviction/restore drm/amdkfd: Add GPUVM virtual address space to PDD drm/amdkfd: Remove unaligned memory access drm/amdkfd: Centralize IOMMUv2 code and make it conditional drm/amdgpu: Add submit IB function for KFD drm/amdgpu: Add GPUVM memory management functions for KFD drm/amdgpu: add amdgpu_sync_clone drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support drm/amdgpu: Add KFD eviction fence drm/amdgpu: Remove unused kfd2kgd interface drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid drm/amdgpu: Fix header file dependencies drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem drm/amdgpu: remove useless BUG_ONs drm/amdgpu: Enable KFD initialization on dGPUs drm/amdkfd: Add dGPU device IDs and device info drm/amdkfd: Add dGPU support to kernel_queue_init drm/amdkfd: Add dGPU support to the MQD manager drm/amdkfd: Add dGPU support to the device queue manager ...
This commit is contained in:
commit
6fa7324ac5
|
@ -766,6 +766,8 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
|
|||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
|
||||
F: drivers/gpu/drm/amd/amdkfd/
|
||||
F: drivers/gpu/drm/amd/include/cik_structs.h
|
||||
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
|
||||
|
|
|
@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref)
|
|||
|
||||
trace_dma_fence_destroy(fence);
|
||||
|
||||
/* Failed to signal before release, could be a refcounting issue */
|
||||
WARN_ON(!list_empty(&fence->cb_list));
|
||||
|
||||
if (fence->ops->release)
|
||||
|
|
|
@ -129,6 +129,8 @@ amdgpu-y += \
|
|||
# add amdkfd interfaces
|
||||
amdgpu-y += \
|
||||
amdgpu_amdkfd.o \
|
||||
amdgpu_amdkfd_fence.o \
|
||||
amdgpu_amdkfd_gpuvm.o \
|
||||
amdgpu_amdkfd_gfx_v8.o
|
||||
|
||||
# add cgs
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
const struct kgd2kfd_calls *kgd2kfd;
|
||||
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
|
||||
|
||||
static const unsigned int compute_vmid_bitmap = 0xFF00;
|
||||
|
||||
int amdgpu_amdkfd_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
|
|||
#else
|
||||
ret = -ENOENT;
|
||||
#endif
|
||||
amdgpu_amdkfd_gpuvm_init_mem_limits();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
|||
switch (adev->asic_type) {
|
||||
#ifdef CONFIG_DRM_AMDGPU_CIK
|
||||
case CHIP_KAVERI:
|
||||
case CHIP_HAWAII:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
|
||||
break;
|
||||
#endif
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
|
||||
break;
|
||||
default:
|
||||
|
@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
int last_valid_bit;
|
||||
if (adev->kfd) {
|
||||
struct kgd2kfd_shared_resources gpu_resources = {
|
||||
.compute_vmid_bitmap = 0xFF00,
|
||||
.compute_vmid_bitmap = compute_vmid_bitmap,
|
||||
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
|
||||
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
|
||||
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
|
||||
.gpuvm_size = min(adev->vm_manager.max_pfn
|
||||
<< AMDGPU_GPU_PAGE_SHIFT,
|
||||
AMDGPU_VA_HOLE_START),
|
||||
.drm_render_minor = adev->ddev->render->index
|
||||
};
|
||||
|
||||
/* this is going to have a few of the MSBs set that we need to
|
||||
|
@ -204,19 +216,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
|||
void **cpu_ptr)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
|
||||
struct amdgpu_bo *bo = NULL;
|
||||
int r;
|
||||
|
||||
BUG_ON(kgd == NULL);
|
||||
BUG_ON(gpu_addr == NULL);
|
||||
BUG_ON(cpu_ptr == NULL);
|
||||
|
||||
*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
|
||||
if ((*mem) == NULL)
|
||||
return -ENOMEM;
|
||||
uint64_t gpu_addr_tmp = 0;
|
||||
void *cpu_ptr_tmp = NULL;
|
||||
|
||||
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo);
|
||||
if (r) {
|
||||
dev_err(adev->dev,
|
||||
"failed to allocate BO for amdkfd (%d)\n", r);
|
||||
|
@ -224,54 +230,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
|||
}
|
||||
|
||||
/* map the buffer */
|
||||
r = amdgpu_bo_reserve((*mem)->bo, true);
|
||||
r = amdgpu_bo_reserve(bo, true);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
|
||||
goto allocate_mem_reserve_bo_failed;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
|
||||
&(*mem)->gpu_addr);
|
||||
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
|
||||
&gpu_addr_tmp);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
|
||||
goto allocate_mem_pin_bo_failed;
|
||||
}
|
||||
*gpu_addr = (*mem)->gpu_addr;
|
||||
|
||||
r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
|
||||
r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
|
||||
if (r) {
|
||||
dev_err(adev->dev,
|
||||
"(%d) failed to map bo to kernel for amdkfd\n", r);
|
||||
goto allocate_mem_kmap_bo_failed;
|
||||
}
|
||||
*cpu_ptr = (*mem)->cpu_ptr;
|
||||
|
||||
amdgpu_bo_unreserve((*mem)->bo);
|
||||
*mem_obj = bo;
|
||||
*gpu_addr = gpu_addr_tmp;
|
||||
*cpu_ptr = cpu_ptr_tmp;
|
||||
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
return 0;
|
||||
|
||||
allocate_mem_kmap_bo_failed:
|
||||
amdgpu_bo_unpin((*mem)->bo);
|
||||
amdgpu_bo_unpin(bo);
|
||||
allocate_mem_pin_bo_failed:
|
||||
amdgpu_bo_unreserve((*mem)->bo);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
allocate_mem_reserve_bo_failed:
|
||||
amdgpu_bo_unref(&(*mem)->bo);
|
||||
amdgpu_bo_unref(&bo);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
||||
{
|
||||
struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
|
||||
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
|
||||
|
||||
BUG_ON(mem == NULL);
|
||||
|
||||
amdgpu_bo_reserve(mem->bo, true);
|
||||
amdgpu_bo_kunmap(mem->bo);
|
||||
amdgpu_bo_unpin(mem->bo);
|
||||
amdgpu_bo_unreserve(mem->bo);
|
||||
amdgpu_bo_unref(&(mem->bo));
|
||||
kfree(mem);
|
||||
amdgpu_bo_reserve(bo, true);
|
||||
amdgpu_bo_kunmap(bo);
|
||||
amdgpu_bo_unpin(bo);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
amdgpu_bo_unref(&(bo));
|
||||
}
|
||||
|
||||
void get_local_mem_info(struct kgd_dev *kgd,
|
||||
|
@ -361,3 +366,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
|
|||
|
||||
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
uint32_t *ib_cmd, uint32_t ib_len)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct amdgpu_job *job;
|
||||
struct amdgpu_ib *ib;
|
||||
struct amdgpu_ring *ring;
|
||||
struct dma_fence *f = NULL;
|
||||
int ret;
|
||||
|
||||
switch (engine) {
|
||||
case KGD_ENGINE_MEC1:
|
||||
ring = &adev->gfx.compute_ring[0];
|
||||
break;
|
||||
case KGD_ENGINE_SDMA1:
|
||||
ring = &adev->sdma.instance[0].ring;
|
||||
break;
|
||||
case KGD_ENGINE_SDMA2:
|
||||
ring = &adev->sdma.instance[1].ring;
|
||||
break;
|
||||
default:
|
||||
pr_err("Invalid engine in IB submission: %d\n", engine);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = amdgpu_job_alloc(adev, 1, &job, NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ib = &job->ibs[0];
|
||||
memset(ib, 0, sizeof(struct amdgpu_ib));
|
||||
|
||||
ib->gpu_addr = gpu_addr;
|
||||
ib->ptr = ib_cmd;
|
||||
ib->length_dw = ib_len;
|
||||
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
|
||||
job->vmid = vmid;
|
||||
|
||||
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
|
||||
if (ret) {
|
||||
DRM_ERROR("amdgpu: failed to schedule IB.\n");
|
||||
goto err_ib_sched;
|
||||
}
|
||||
|
||||
ret = dma_fence_wait(f, false);
|
||||
|
||||
err_ib_sched:
|
||||
dma_fence_put(f);
|
||||
amdgpu_job_free(job);
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
|
||||
{
|
||||
if (adev->kfd) {
|
||||
if ((1 << vmid) & compute_vmid_bitmap)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -28,13 +28,89 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <kgd_kfd_interface.h>
|
||||
#include <drm/ttm/ttm_execbuf_util.h>
|
||||
#include "amdgpu_sync.h"
|
||||
#include "amdgpu_vm.h"
|
||||
|
||||
extern const struct kgd2kfd_calls *kgd2kfd;
|
||||
|
||||
struct amdgpu_device;
|
||||
|
||||
struct kfd_bo_va_list {
|
||||
struct list_head bo_list;
|
||||
struct amdgpu_bo_va *bo_va;
|
||||
void *kgd_dev;
|
||||
bool is_mapped;
|
||||
uint64_t va;
|
||||
uint64_t pte_flags;
|
||||
};
|
||||
|
||||
struct kgd_mem {
|
||||
struct mutex lock;
|
||||
struct amdgpu_bo *bo;
|
||||
uint64_t gpu_addr;
|
||||
void *cpu_ptr;
|
||||
struct list_head bo_va_list;
|
||||
/* protected by amdkfd_process_info.lock */
|
||||
struct ttm_validate_buffer validate_list;
|
||||
struct ttm_validate_buffer resv_list;
|
||||
uint32_t domain;
|
||||
unsigned int mapped_to_gpu_memory;
|
||||
uint64_t va;
|
||||
|
||||
uint32_t mapping_flags;
|
||||
|
||||
struct amdkfd_process_info *process_info;
|
||||
|
||||
struct amdgpu_sync sync;
|
||||
|
||||
bool aql_queue;
|
||||
};
|
||||
|
||||
/* KFD Memory Eviction */
|
||||
struct amdgpu_amdkfd_fence {
|
||||
struct dma_fence base;
|
||||
struct mm_struct *mm;
|
||||
spinlock_t lock;
|
||||
char timeline_name[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
struct mm_struct *mm);
|
||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
|
||||
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
|
||||
|
||||
struct amdkfd_process_info {
|
||||
/* List head of all VMs that belong to a KFD process */
|
||||
struct list_head vm_list_head;
|
||||
/* List head for all KFD BOs that belong to a KFD process. */
|
||||
struct list_head kfd_bo_list;
|
||||
/* Lock to protect kfd_bo_list */
|
||||
struct mutex lock;
|
||||
|
||||
/* Number of VMs */
|
||||
unsigned int n_vms;
|
||||
/* Eviction Fence */
|
||||
struct amdgpu_amdkfd_fence *eviction_fence;
|
||||
};
|
||||
|
||||
/* struct amdkfd_vm -
|
||||
* For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
|
||||
* belonging to a KFD process. All the VMs belonging to the same process point
|
||||
* to the same amdkfd_process_info.
|
||||
*/
|
||||
struct amdkfd_vm {
|
||||
/* Keep base as the first parameter for pointer compatibility between
|
||||
* amdkfd_vm and amdgpu_vm.
|
||||
*/
|
||||
struct amdgpu_vm base;
|
||||
|
||||
/* List node in amdkfd_process_info.vm_list_head*/
|
||||
struct list_head vm_list_node;
|
||||
|
||||
struct amdgpu_device *adev;
|
||||
/* Points to the KFD process VM info*/
|
||||
struct amdkfd_process_info *process_info;
|
||||
|
||||
uint64_t pd_phys_addr;
|
||||
};
|
||||
|
||||
int amdgpu_amdkfd_init(void);
|
||||
|
@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
|
|||
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
uint32_t *ib_cmd, uint32_t ib_len);
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
|
||||
|
||||
/* Shared API */
|
||||
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
|
@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
|
|||
valid; \
|
||||
})
|
||||
|
||||
/* GPUVM API */
|
||||
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
|
||||
void **process_info,
|
||||
struct dma_fence **ef);
|
||||
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
|
||||
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct kgd_dev *kgd, uint64_t va, uint64_t size,
|
||||
void *vm, struct kgd_mem **mem,
|
||||
uint64_t *offset, uint32_t flags);
|
||||
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem);
|
||||
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
|
||||
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
|
||||
int amdgpu_amdkfd_gpuvm_sync_memory(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
|
||||
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
|
||||
struct kgd_mem *mem, void **kptr, uint64_t *size);
|
||||
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
|
||||
struct dma_fence **ef);
|
||||
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
|
||||
|
||||
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
|
||||
|
|
|
@ -0,0 +1,179 @@
|
|||
/*
|
||||
* Copyright 2016-2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/dma-fence.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
static const struct dma_fence_ops amdkfd_fence_ops;
|
||||
static atomic_t fence_seq = ATOMIC_INIT(0);
|
||||
|
||||
/* Eviction Fence
|
||||
* Fence helper functions to deal with KFD memory eviction.
|
||||
* Big Idea - Since KFD submissions are done by user queues, a BO cannot be
|
||||
* evicted unless all the user queues for that process are evicted.
|
||||
*
|
||||
* All the BOs in a process share an eviction fence. When process X wants
|
||||
* to map VRAM memory but TTM can't find enough space, TTM will attempt to
|
||||
* evict BOs from its LRU list. TTM checks if the BO is valuable to evict
|
||||
* by calling ttm_bo_driver->eviction_valuable().
|
||||
*
|
||||
* ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
|
||||
* to process X. Otherwise, it will return true to indicate BO can be
|
||||
* evicted by TTM.
|
||||
*
|
||||
* If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
|
||||
* the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
|
||||
* --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
|
||||
*
|
||||
* GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
|
||||
* nofity when the BO is free to move. fence_add_callback --> enable_signaling
|
||||
* --> amdgpu_amdkfd_fence.enable_signaling
|
||||
*
|
||||
* amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
|
||||
* user queues and signal fence. The work item will also start another delayed
|
||||
* work item to restore BOs
|
||||
*/
|
||||
|
||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence;
|
||||
|
||||
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
|
||||
if (fence == NULL)
|
||||
return NULL;
|
||||
|
||||
/* This reference gets released in amdkfd_fence_release */
|
||||
mmgrab(mm);
|
||||
fence->mm = mm;
|
||||
get_task_comm(fence->timeline_name, current);
|
||||
spin_lock_init(&fence->lock);
|
||||
|
||||
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
|
||||
context, atomic_inc_return(&fence_seq));
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence;
|
||||
|
||||
if (!f)
|
||||
return NULL;
|
||||
|
||||
fence = container_of(f, struct amdgpu_amdkfd_fence, base);
|
||||
if (fence && f->ops == &amdkfd_fence_ops)
|
||||
return fence;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
|
||||
{
|
||||
return "amdgpu_amdkfd_fence";
|
||||
}
|
||||
|
||||
static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
|
||||
|
||||
return fence->timeline_name;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
|
||||
* a KFD BO and schedules a job to move the BO.
|
||||
* If fence is already signaled return true.
|
||||
* If fence is not signaled schedule a evict KFD process work item.
|
||||
*/
|
||||
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
|
||||
|
||||
if (!fence)
|
||||
return false;
|
||||
|
||||
if (dma_fence_is_signaled(f))
|
||||
return true;
|
||||
|
||||
if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdkfd_fence_release - callback that fence can be freed
|
||||
*
|
||||
* @fence: fence
|
||||
*
|
||||
* This function is called when the reference count becomes zero.
|
||||
* Drops the mm_struct reference and RCU schedules freeing up the fence.
|
||||
*/
|
||||
static void amdkfd_fence_release(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
|
||||
|
||||
/* Unconditionally signal the fence. The process is getting
|
||||
* terminated.
|
||||
*/
|
||||
if (WARN_ON(!fence))
|
||||
return; /* Not an amdgpu_amdkfd_fence */
|
||||
|
||||
mmdrop(fence->mm);
|
||||
kfree_rcu(f, rcu);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
|
||||
* if same return TRUE else return FALSE.
|
||||
*
|
||||
* @f: [IN] fence
|
||||
* @mm: [IN] mm that needs to be verified
|
||||
*/
|
||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
|
||||
|
||||
if (!fence)
|
||||
return false;
|
||||
else if (fence->mm == mm)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops amdkfd_fence_ops = {
|
||||
.get_driver_name = amdkfd_fence_get_driver_name,
|
||||
.get_timeline_name = amdkfd_fence_get_timeline_name,
|
||||
.enable_signaling = amdkfd_fence_enable_signaling,
|
||||
.signaled = NULL,
|
||||
.wait = dma_fence_default_wait,
|
||||
.release = amdkfd_fence_release,
|
||||
};
|
|
@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
|
|||
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
|
||||
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
||||
uint8_t vmid);
|
||||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
|
||||
|
||||
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
|
||||
static void set_scratch_backing_va(struct kgd_dev *kgd,
|
||||
uint64_t va, uint32_t vmid);
|
||||
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t page_table_base);
|
||||
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
|
||||
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
|
||||
|
||||
/* Because of REG_GET_FIELD() being used, we put this function in the
|
||||
* asic specific file.
|
||||
|
@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
|||
.address_watch_get_offset = kgd_address_watch_get_offset,
|
||||
.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
|
||||
.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
|
||||
.write_vmid_invalidate_request = write_vmid_invalidate_request,
|
||||
.get_fw_version = get_fw_version,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = get_tile_config,
|
||||
.get_cu_info = get_cu_info,
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
|
||||
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
|
||||
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
|
||||
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
|
||||
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
|
||||
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
|
||||
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
|
||||
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
|
||||
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
|
||||
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
||||
|
@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
|
||||
reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
|
||||
return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
|
||||
}
|
||||
|
||||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
|
||||
}
|
||||
|
||||
static void set_scratch_backing_va(struct kgd_dev *kgd,
|
||||
|
@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
const union amdgpu_firmware_header *hdr;
|
||||
|
||||
BUG_ON(kgd == NULL);
|
||||
|
||||
switch (type) {
|
||||
case KGD_ENGINE_PFP:
|
||||
hdr = (const union amdgpu_firmware_header *)
|
||||
|
@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
|
|||
return hdr->common.ucode_version;
|
||||
}
|
||||
|
||||
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t page_table_base)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
|
||||
pr_err("trying to set page table base for wrong VMID\n");
|
||||
return;
|
||||
}
|
||||
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
|
||||
}
|
||||
|
||||
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
int vmid;
|
||||
unsigned int tmp;
|
||||
|
||||
for (vmid = 0; vmid < 16; vmid++) {
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
|
||||
continue;
|
||||
|
||||
tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
|
||||
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
|
||||
(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
RREG32(mmVM_INVALIDATE_RESPONSE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
|
||||
pr_err("non kfd vmid\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
RREG32(mmVM_INVALIDATE_RESPONSE);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
|||
uint32_t queue_id);
|
||||
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
unsigned int utimeout);
|
||||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
|
||||
static int kgd_address_watch_disable(struct kgd_dev *kgd);
|
||||
static int kgd_address_watch_execute(struct kgd_dev *kgd,
|
||||
unsigned int watch_point_id,
|
||||
|
@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
|
|||
uint8_t vmid);
|
||||
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
||||
uint8_t vmid);
|
||||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
|
||||
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
|
||||
static void set_scratch_backing_va(struct kgd_dev *kgd,
|
||||
uint64_t va, uint32_t vmid);
|
||||
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t page_table_base);
|
||||
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
|
||||
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
|
||||
|
||||
/* Because of REG_GET_FIELD() being used, we put this function in the
|
||||
* asic specific file.
|
||||
|
@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
|||
get_atc_vmid_pasid_mapping_pasid,
|
||||
.get_atc_vmid_pasid_mapping_valid =
|
||||
get_atc_vmid_pasid_mapping_valid,
|
||||
.write_vmid_invalidate_request = write_vmid_invalidate_request,
|
||||
.get_fw_version = get_fw_version,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = get_tile_config,
|
||||
.get_cu_info = get_cu_info,
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
|
||||
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
|
||||
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
|
||||
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
|
||||
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
|
||||
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
|
||||
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
|
||||
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
|
||||
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
|
||||
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
||||
|
@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
|
||||
reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
|
||||
return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
|
||||
}
|
||||
|
||||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
|
||||
}
|
||||
|
||||
static int kgd_address_watch_disable(struct kgd_dev *kgd)
|
||||
|
@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
const union amdgpu_firmware_header *hdr;
|
||||
|
||||
BUG_ON(kgd == NULL);
|
||||
|
||||
switch (type) {
|
||||
case KGD_ENGINE_PFP:
|
||||
hdr = (const union amdgpu_firmware_header *)
|
||||
|
@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
|
|||
/* Only 12 bit in use*/
|
||||
return hdr->common.ucode_version;
|
||||
}
|
||||
|
||||
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t page_table_base)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
|
||||
pr_err("trying to set page table base for wrong VMID\n");
|
||||
return;
|
||||
}
|
||||
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
|
||||
}
|
||||
|
||||
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
int vmid;
|
||||
unsigned int tmp;
|
||||
|
||||
for (vmid = 0; vmid < 16; vmid++) {
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
|
||||
continue;
|
||||
|
||||
tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
|
||||
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
|
||||
(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
RREG32(mmVM_INVALIDATE_RESPONSE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
|
||||
pr_err("non kfd vmid %d\n", vmid);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
RREG32(mmVM_INVALIDATE_RESPONSE);
|
||||
return 0;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -36,6 +36,7 @@
|
|||
#include <drm/drm_cache.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_trace.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
static bool amdgpu_need_backup(struct amdgpu_device *adev)
|
||||
{
|
||||
|
@ -54,6 +55,9 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
|
|||
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
|
||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
|
||||
|
||||
if (bo->kfd_bo)
|
||||
amdgpu_amdkfd_unreserve_system_memory_limit(bo);
|
||||
|
||||
amdgpu_bo_kunmap(bo);
|
||||
|
||||
drm_gem_object_release(&bo->gem_base);
|
||||
|
|
|
@ -92,6 +92,8 @@ struct amdgpu_bo {
|
|||
struct list_head mn_list;
|
||||
struct list_head shadow_list;
|
||||
};
|
||||
|
||||
struct kgd_mem *kfd_bo;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include <drm/amdgpu_drm.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <drm/drm_print.h>
|
||||
|
||||
/* max number of rings */
|
||||
#define AMDGPU_MAX_RINGS 18
|
||||
|
@ -35,8 +36,9 @@
|
|||
#define AMDGPU_MAX_UVD_ENC_RINGS 2
|
||||
|
||||
/* some special values for the owner field */
|
||||
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
|
||||
#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
|
||||
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
|
||||
#define AMDGPU_FENCE_OWNER_VM ((void *)1ul)
|
||||
#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
|
||||
|
||||
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
|
||||
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_trace.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
struct amdgpu_sync_entry {
|
||||
struct hlist_node node;
|
||||
|
@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
|
|||
*/
|
||||
static void *amdgpu_sync_get_owner(struct dma_fence *f)
|
||||
{
|
||||
struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
|
||||
struct drm_sched_fence *s_fence;
|
||||
struct amdgpu_amdkfd_fence *kfd_fence;
|
||||
|
||||
if (!f)
|
||||
return AMDGPU_FENCE_OWNER_UNDEFINED;
|
||||
|
||||
s_fence = to_drm_sched_fence(f);
|
||||
if (s_fence)
|
||||
return s_fence->owner;
|
||||
|
||||
kfd_fence = to_amdgpu_amdkfd_fence(f);
|
||||
if (kfd_fence)
|
||||
return AMDGPU_FENCE_OWNER_KFD;
|
||||
|
||||
return AMDGPU_FENCE_OWNER_UNDEFINED;
|
||||
}
|
||||
|
||||
|
@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
|
|||
for (i = 0; i < flist->shared_count; ++i) {
|
||||
f = rcu_dereference_protected(flist->shared[i],
|
||||
reservation_object_held(resv));
|
||||
/* We only want to trigger KFD eviction fences on
|
||||
* evict or move jobs. Skip KFD fences otherwise.
|
||||
*/
|
||||
fence_owner = amdgpu_sync_get_owner(f);
|
||||
if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
|
||||
owner != AMDGPU_FENCE_OWNER_UNDEFINED)
|
||||
continue;
|
||||
|
||||
if (amdgpu_sync_same_dev(adev, f)) {
|
||||
/* VM updates are only interesting
|
||||
* for other VM updates and moves.
|
||||
*/
|
||||
fence_owner = amdgpu_sync_get_owner(f);
|
||||
if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
|
||||
(fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
|
||||
((owner == AMDGPU_FENCE_OWNER_VM) !=
|
||||
|
@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_sync_clone - clone a sync object
|
||||
*
|
||||
* @source: sync object to clone
|
||||
* @clone: pointer to destination sync object
|
||||
*
|
||||
* Adds references to all unsignaled fences in @source to @clone. Also
|
||||
* removes signaled fences from @source while at it.
|
||||
*/
|
||||
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
|
||||
{
|
||||
struct amdgpu_sync_entry *e;
|
||||
struct hlist_node *tmp;
|
||||
struct dma_fence *f;
|
||||
int i, r;
|
||||
|
||||
hash_for_each_safe(source->fences, i, tmp, e, node) {
|
||||
f = e->fence;
|
||||
if (!dma_fence_is_signaled(f)) {
|
||||
r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
|
||||
if (r)
|
||||
return r;
|
||||
} else {
|
||||
hash_del(&e->node);
|
||||
dma_fence_put(f);
|
||||
kmem_cache_free(amdgpu_sync_slab, e);
|
||||
}
|
||||
}
|
||||
|
||||
dma_fence_put(clone->last_vm_update);
|
||||
clone->last_vm_update = dma_fence_get(source->last_vm_update);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
|
||||
{
|
||||
struct amdgpu_sync_entry *e;
|
||||
|
|
|
@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
|
|||
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
|
||||
struct amdgpu_ring *ring);
|
||||
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
|
||||
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
|
||||
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
|
||||
void amdgpu_sync_free(struct amdgpu_sync *sync);
|
||||
int amdgpu_sync_init(void);
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include "amdgpu.h"
|
||||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_trace.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "bif/bif_4_1_d.h"
|
||||
|
||||
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
|
||||
|
@ -258,6 +259,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
|
|||
{
|
||||
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
|
||||
|
||||
/*
|
||||
* Don't verify access for KFD BOs. They don't have a GEM
|
||||
* object associated with them.
|
||||
*/
|
||||
if (abo->kfd_bo)
|
||||
return 0;
|
||||
|
||||
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
|
||||
return -EPERM;
|
||||
return drm_vma_node_verify_access(&abo->gem_base.vma_node,
|
||||
|
@ -1171,6 +1179,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
|
|||
{
|
||||
unsigned long num_pages = bo->mem.num_pages;
|
||||
struct drm_mm_node *node = bo->mem.mm_node;
|
||||
struct reservation_object_list *flist;
|
||||
struct dma_fence *f;
|
||||
int i;
|
||||
|
||||
/* If bo is a KFD BO, check if the bo belongs to the current process.
|
||||
* If true, then return false as any KFD process needs all its BOs to
|
||||
* be resident to run successfully
|
||||
*/
|
||||
flist = reservation_object_get_list(bo->resv);
|
||||
if (flist) {
|
||||
for (i = 0; i < flist->shared_count; ++i) {
|
||||
f = rcu_dereference_protected(flist->shared[i],
|
||||
reservation_object_held(bo->resv));
|
||||
if (amdkfd_fence_check_mm(f, current->mm))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
switch (bo->mem.mem_type) {
|
||||
case TTM_PL_TT:
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <linux/kfifo.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <drm/drm_file.h>
|
||||
|
||||
#include "amdgpu_sync.h"
|
||||
#include "amdgpu_ring.h"
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
config HSA_AMD
|
||||
tristate "HSA kernel driver for AMD GPU devices"
|
||||
depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
|
||||
depends on DRM_AMDGPU && X86_64
|
||||
imply AMD_IOMMU_V2
|
||||
help
|
||||
Enable this if you want to use HSA features on AMD GPU devices.
|
||||
|
|
|
@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
|
|||
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
|
||||
kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
|
||||
|
||||
ifneq ($(CONFIG_AMD_IOMMU_V2),)
|
||||
amdkfd-y += kfd_iommu.o
|
||||
endif
|
||||
|
||||
amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
|
||||
|
||||
obj-$(CONFIG_HSA_AMD) += amdkfd.o
|
||||
|
|
|
@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
|
|||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
|
||||
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
|
||||
pdd->qpd.vmid != 0)
|
||||
dev->kfd2kgd->set_scratch_backing_va(
|
||||
dev->kgd, args->va_addr, pdd->qpd.vmid);
|
||||
|
||||
|
|
|
@ -22,10 +22,10 @@
|
|||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/amd-iommu.h>
|
||||
#include "kfd_crat.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_topology.h"
|
||||
#include "kfd_iommu.h"
|
||||
|
||||
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
|
||||
* GPU processor ID are expressed with Bit[31]=1.
|
||||
|
@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
|||
struct crat_subtype_generic *sub_type_hdr;
|
||||
struct crat_subtype_computeunit *cu;
|
||||
struct kfd_cu_info cu_info;
|
||||
struct amd_iommu_device_info iommu_info;
|
||||
int avail_size = *size;
|
||||
uint32_t total_num_of_cu;
|
||||
int num_of_cache_entries = 0;
|
||||
int cache_mem_filled = 0;
|
||||
int ret = 0;
|
||||
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
|
||||
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
|
||||
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
|
||||
struct kfd_local_mem_info local_mem_info;
|
||||
|
||||
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
|
||||
|
@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
|||
/* Check if this node supports IOMMU. During parsing this flag will
|
||||
* translate to HSA_CAP_ATS_PRESENT
|
||||
*/
|
||||
iommu_info.flags = 0;
|
||||
if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
|
||||
if ((iommu_info.flags & required_iommu_flags) ==
|
||||
required_iommu_flags)
|
||||
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
|
||||
}
|
||||
if (!kfd_iommu_check_device(kdev))
|
||||
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
|
||||
|
||||
crat_table->length += sub_type_hdr->length;
|
||||
crat_table->total_entries++;
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "kfd_pm4_headers_diq.h"
|
||||
#include "kfd_dbgmgr.h"
|
||||
#include "kfd_dbgdev.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
|
||||
static DEFINE_MUTEX(kfd_dbgmgr_mutex);
|
||||
|
||||
|
@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
|
|||
}
|
||||
|
||||
/* get actual type of DBGDevice cpsch or not */
|
||||
if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
|
||||
if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
|
||||
type = DBGDEV_TYPE_NODIQ;
|
||||
|
||||
kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
|
||||
|
|
|
@ -20,7 +20,9 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
|
||||
#include <linux/amd-iommu.h>
|
||||
#endif
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -28,9 +30,12 @@
|
|||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_pm4_headers_vi.h"
|
||||
#include "cwsr_trap_handler_gfx8.asm"
|
||||
#include "kfd_iommu.h"
|
||||
|
||||
#define MQD_SIZE_ALIGNED 768
|
||||
static atomic_t kfd_device_suspended = ATOMIC_INIT(0);
|
||||
|
||||
#ifdef KFD_SUPPORT_IOMMU_V2
|
||||
static const struct kfd_device_info kaveri_device_info = {
|
||||
.asic_family = CHIP_KAVERI,
|
||||
.max_pasid_bits = 16,
|
||||
|
@ -41,6 +46,8 @@ static const struct kfd_device_info kaveri_device_info = {
|
|||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = false,
|
||||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = false,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info carrizo_device_info = {
|
||||
|
@ -53,15 +60,125 @@ static const struct kfd_device_info carrizo_device_info = {
|
|||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = false,
|
||||
};
|
||||
#endif
|
||||
|
||||
static const struct kfd_device_info hawaii_device_info = {
|
||||
.asic_family = CHIP_HAWAII,
|
||||
.max_pasid_bits = 16,
|
||||
/* max num of queues for KV.TODO should be a dynamic value */
|
||||
.max_no_of_hqd = 24,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = false,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info tonga_device_info = {
|
||||
.asic_family = CHIP_TONGA,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = false,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info tonga_vf_device_info = {
|
||||
.asic_family = CHIP_TONGA,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = false,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info fiji_device_info = {
|
||||
.asic_family = CHIP_FIJI,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info fiji_vf_device_info = {
|
||||
.asic_family = CHIP_FIJI,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
};
|
||||
|
||||
|
||||
static const struct kfd_device_info polaris10_device_info = {
|
||||
.asic_family = CHIP_POLARIS10,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info polaris10_vf_device_info = {
|
||||
.asic_family = CHIP_POLARIS10,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info polaris11_device_info = {
|
||||
.asic_family = CHIP_POLARIS11,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
};
|
||||
|
||||
|
||||
struct kfd_deviceid {
|
||||
unsigned short did;
|
||||
const struct kfd_device_info *device_info;
|
||||
};
|
||||
|
||||
/* Please keep this sorted by increasing device id. */
|
||||
static const struct kfd_deviceid supported_devices[] = {
|
||||
#ifdef KFD_SUPPORT_IOMMU_V2
|
||||
{ 0x1304, &kaveri_device_info }, /* Kaveri */
|
||||
{ 0x1305, &kaveri_device_info }, /* Kaveri */
|
||||
{ 0x1306, &kaveri_device_info }, /* Kaveri */
|
||||
|
@ -88,7 +205,51 @@ static const struct kfd_deviceid supported_devices[] = {
|
|||
{ 0x9874, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x9875, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x9876, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x9877, &carrizo_device_info } /* Carrizo */
|
||||
{ 0x9877, &carrizo_device_info }, /* Carrizo */
|
||||
#endif
|
||||
{ 0x67A0, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67A1, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67A2, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67A8, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67A9, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67AA, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67B0, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67B1, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67B8, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67B9, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67BA, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67BE, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x6920, &tonga_device_info }, /* Tonga */
|
||||
{ 0x6921, &tonga_device_info }, /* Tonga */
|
||||
{ 0x6928, &tonga_device_info }, /* Tonga */
|
||||
{ 0x6929, &tonga_device_info }, /* Tonga */
|
||||
{ 0x692B, &tonga_device_info }, /* Tonga */
|
||||
{ 0x692F, &tonga_vf_device_info }, /* Tonga vf */
|
||||
{ 0x6938, &tonga_device_info }, /* Tonga */
|
||||
{ 0x6939, &tonga_device_info }, /* Tonga */
|
||||
{ 0x7300, &fiji_device_info }, /* Fiji */
|
||||
{ 0x730F, &fiji_vf_device_info }, /* Fiji vf*/
|
||||
{ 0x67C0, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67C1, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67C2, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67C4, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67C7, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67C8, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67C9, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67CA, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67CC, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67CF, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/
|
||||
{ 0x67DF, &polaris10_device_info }, /* Polaris10 */
|
||||
{ 0x67E0, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67E1, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67E3, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67E7, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67E8, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67E9, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67EB, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67EF, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67FF, &polaris11_device_info }, /* Polaris11 */
|
||||
};
|
||||
|
||||
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
|
||||
|
@ -127,6 +288,21 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (device_info->needs_pci_atomics) {
|
||||
/* Allow BIF to recode atomics to PCIe 3.0
|
||||
* AtomicOps. 32 and 64-bit requests are possible and
|
||||
* must be supported.
|
||||
*/
|
||||
if (pci_enable_atomic_ops_to_root(pdev,
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
|
||||
dev_info(kfd_device,
|
||||
"skipped device %x:%x, PCI rejects atomics",
|
||||
pdev->vendor, pdev->device);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
|
||||
if (!kfd)
|
||||
return NULL;
|
||||
|
@ -144,77 +320,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
|
|||
return kfd;
|
||||
}
|
||||
|
||||
static bool device_iommu_pasid_init(struct kfd_dev *kfd)
|
||||
{
|
||||
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
|
||||
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
|
||||
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
|
||||
|
||||
struct amd_iommu_device_info iommu_info;
|
||||
unsigned int pasid_limit;
|
||||
int err;
|
||||
|
||||
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
|
||||
if (err < 0) {
|
||||
dev_err(kfd_device,
|
||||
"error getting iommu info. is the iommu enabled?\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
|
||||
dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n",
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
|
||||
!= 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
pasid_limit = min_t(unsigned int,
|
||||
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
|
||||
iommu_info.max_pasids);
|
||||
|
||||
if (!kfd_set_pasid_limit(pasid_limit)) {
|
||||
dev_err(kfd_device, "error setting pasid limit\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
|
||||
{
|
||||
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
|
||||
|
||||
if (dev)
|
||||
kfd_process_iommu_unbind_callback(dev, pasid);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function called by IOMMU driver on PPR failure
|
||||
*/
|
||||
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
|
||||
unsigned long address, u16 flags)
|
||||
{
|
||||
struct kfd_dev *dev;
|
||||
|
||||
dev_warn(kfd_device,
|
||||
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
|
||||
PCI_BUS_NUM(pdev->devfn),
|
||||
PCI_SLOT(pdev->devfn),
|
||||
PCI_FUNC(pdev->devfn),
|
||||
pasid,
|
||||
address,
|
||||
flags);
|
||||
|
||||
dev = kfd_device_by_pci_dev(pdev);
|
||||
if (!WARN_ON(!dev))
|
||||
kfd_signal_iommu_event(dev, pasid, address,
|
||||
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
|
||||
|
||||
return AMD_IOMMU_INV_PRI_RSP_INVALID;
|
||||
}
|
||||
|
||||
static void kfd_cwsr_init(struct kfd_dev *kfd)
|
||||
{
|
||||
if (cwsr_enable && kfd->device_info->supports_cwsr) {
|
||||
|
@ -304,11 +409,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
goto device_queue_manager_error;
|
||||
}
|
||||
|
||||
if (!device_iommu_pasid_init(kfd)) {
|
||||
dev_err(kfd_device,
|
||||
"Error initializing iommuv2 for device %x:%x\n",
|
||||
kfd->pdev->vendor, kfd->pdev->device);
|
||||
goto device_iommu_pasid_error;
|
||||
if (kfd_iommu_device_init(kfd)) {
|
||||
dev_err(kfd_device, "Error initializing iommuv2\n");
|
||||
goto device_iommu_error;
|
||||
}
|
||||
|
||||
kfd_cwsr_init(kfd);
|
||||
|
@ -323,12 +426,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
kfd->pdev->device);
|
||||
|
||||
pr_debug("Starting kfd with the following scheduling policy %d\n",
|
||||
sched_policy);
|
||||
kfd->dqm->sched_policy);
|
||||
|
||||
goto out;
|
||||
|
||||
kfd_resume_error:
|
||||
device_iommu_pasid_error:
|
||||
device_iommu_error:
|
||||
device_queue_manager_uninit(kfd->dqm);
|
||||
device_queue_manager_error:
|
||||
kfd_interrupt_exit(kfd);
|
||||
|
@ -367,40 +470,45 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
|
|||
if (!kfd->init_complete)
|
||||
return;
|
||||
|
||||
/* For first KFD device suspend all the KFD processes */
|
||||
if (atomic_inc_return(&kfd_device_suspended) == 1)
|
||||
kfd_suspend_all_processes();
|
||||
|
||||
kfd->dqm->ops.stop(kfd->dqm);
|
||||
|
||||
kfd_unbind_processes_from_device(kfd);
|
||||
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
|
||||
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
|
||||
amd_iommu_free_device(kfd->pdev);
|
||||
kfd_iommu_suspend(kfd);
|
||||
}
|
||||
|
||||
int kgd2kfd_resume(struct kfd_dev *kfd)
|
||||
{
|
||||
int ret, count;
|
||||
|
||||
if (!kfd->init_complete)
|
||||
return 0;
|
||||
|
||||
return kfd_resume(kfd);
|
||||
ret = kfd_resume(kfd);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
count = atomic_dec_return(&kfd_device_suspended);
|
||||
WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
|
||||
if (count == 0)
|
||||
ret = kfd_resume_all_processes();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kfd_resume(struct kfd_dev *kfd)
|
||||
{
|
||||
int err = 0;
|
||||
unsigned int pasid_limit = kfd_get_pasid_limit();
|
||||
|
||||
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
|
||||
if (err)
|
||||
return -ENXIO;
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
|
||||
iommu_pasid_shutdown_callback);
|
||||
amd_iommu_set_invalid_ppr_cb(kfd->pdev,
|
||||
iommu_invalid_ppr_cb);
|
||||
|
||||
err = kfd_bind_processes_to_device(kfd);
|
||||
if (err)
|
||||
goto processes_bind_error;
|
||||
err = kfd_iommu_resume(kfd);
|
||||
if (err) {
|
||||
dev_err(kfd_device,
|
||||
"Failed to resume IOMMU for device %x:%x\n",
|
||||
kfd->pdev->vendor, kfd->pdev->device);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = kfd->dqm->ops.start(kfd->dqm);
|
||||
if (err) {
|
||||
|
@ -413,9 +521,7 @@ static int kfd_resume(struct kfd_dev *kfd)
|
|||
return err;
|
||||
|
||||
dqm_start_error:
|
||||
processes_bind_error:
|
||||
amd_iommu_free_device(kfd->pdev);
|
||||
|
||||
kfd_iommu_suspend(kfd);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -435,6 +541,54 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
|||
spin_unlock(&kfd->interrupt_lock);
|
||||
}
|
||||
|
||||
/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
|
||||
* prepare for safe eviction of KFD BOs that belong to the specified
|
||||
* process.
|
||||
*
|
||||
* @mm: mm_struct that identifies the specified KFD process
|
||||
* @fence: eviction fence attached to KFD process BOs
|
||||
*
|
||||
*/
|
||||
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
unsigned long active_time;
|
||||
unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
|
||||
|
||||
if (!fence)
|
||||
return -EINVAL;
|
||||
|
||||
if (dma_fence_is_signaled(fence))
|
||||
return 0;
|
||||
|
||||
p = kfd_lookup_process_by_mm(mm);
|
||||
if (!p)
|
||||
return -ENODEV;
|
||||
|
||||
if (fence->seqno == p->last_eviction_seqno)
|
||||
goto out;
|
||||
|
||||
p->last_eviction_seqno = fence->seqno;
|
||||
|
||||
/* Avoid KFD process starvation. Wait for at least
|
||||
* PROCESS_ACTIVE_TIME_MS before evicting the process again
|
||||
*/
|
||||
active_time = get_jiffies_64() - p->last_restore_timestamp;
|
||||
if (delay_jiffies > active_time)
|
||||
delay_jiffies -= active_time;
|
||||
else
|
||||
delay_jiffies = 0;
|
||||
|
||||
/* During process initialization eviction_work.dwork is initialized
|
||||
* to kfd_evict_bo_worker
|
||||
*/
|
||||
schedule_delayed_work(&p->eviction_work, delay_jiffies);
|
||||
out:
|
||||
kfd_unref_process(p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
|
||||
unsigned int chunk_size)
|
||||
{
|
||||
|
|
|
@ -21,10 +21,11 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/sched.h>
|
||||
#include "kfd_priv.h"
|
||||
|
@ -118,9 +119,8 @@ static int allocate_vmid(struct device_queue_manager *dqm,
|
|||
if (dqm->vmid_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
|
||||
bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap,
|
||||
dqm->dev->vm_info.vmid_num_kfd);
|
||||
clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
|
||||
bit = ffs(dqm->vmid_bitmap) - 1;
|
||||
dqm->vmid_bitmap &= ~(1 << bit);
|
||||
|
||||
allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
|
||||
pr_debug("vmid allocation %d\n", allocated_vmid);
|
||||
|
@ -130,6 +130,15 @@ static int allocate_vmid(struct device_queue_manager *dqm,
|
|||
set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
|
||||
program_sh_mem_settings(dqm, qpd);
|
||||
|
||||
/* qpd->page_table_base is set earlier when register_process()
|
||||
* is called, i.e. when the first queue is created.
|
||||
*/
|
||||
dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
|
||||
qpd->vmid,
|
||||
qpd->page_table_base);
|
||||
/* invalidate the VM context after pasid and vmid mapping is set up */
|
||||
kfd_flush_tlb(qpd_to_pdd(qpd));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -139,10 +148,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
|
|||
{
|
||||
int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
|
||||
|
||||
kfd_flush_tlb(qpd_to_pdd(qpd));
|
||||
|
||||
/* Release the vmid mapping */
|
||||
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
|
||||
|
||||
set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
|
||||
dqm->vmid_bitmap |= (1 << bit);
|
||||
qpd->vmid = 0;
|
||||
q->properties.vmid = 0;
|
||||
}
|
||||
|
@ -170,6 +181,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
|||
goto out_unlock;
|
||||
}
|
||||
q->properties.vmid = qpd->vmid;
|
||||
/*
|
||||
* Eviction state logic: we only mark active queues as evicted
|
||||
* to avoid the overhead of restoring inactive queues later
|
||||
*/
|
||||
if (qpd->evicted)
|
||||
q->properties.is_evicted = (q->properties.queue_size > 0 &&
|
||||
q->properties.queue_percent > 0 &&
|
||||
q->properties.queue_address != 0);
|
||||
|
||||
q->properties.tba_addr = qpd->tba_addr;
|
||||
q->properties.tma_addr = qpd->tma_addr;
|
||||
|
@ -223,12 +242,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
|
|||
continue;
|
||||
|
||||
if (dqm->allocated_queues[pipe] != 0) {
|
||||
bit = find_first_bit(
|
||||
(unsigned long *)&dqm->allocated_queues[pipe],
|
||||
get_queues_per_pipe(dqm));
|
||||
|
||||
clear_bit(bit,
|
||||
(unsigned long *)&dqm->allocated_queues[pipe]);
|
||||
bit = ffs(dqm->allocated_queues[pipe]) - 1;
|
||||
dqm->allocated_queues[pipe] &= ~(1 << bit);
|
||||
q->pipe = pipe;
|
||||
q->queue = bit;
|
||||
set = true;
|
||||
|
@ -249,7 +264,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
|
|||
static inline void deallocate_hqd(struct device_queue_manager *dqm,
|
||||
struct queue *q)
|
||||
{
|
||||
set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
|
||||
dqm->allocated_queues[q->pipe] |= (1 << q->queue);
|
||||
}
|
||||
|
||||
static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
@ -371,21 +386,35 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
|||
{
|
||||
int retval;
|
||||
struct mqd_manager *mqd;
|
||||
struct kfd_process_device *pdd;
|
||||
bool prev_active = false;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
pdd = kfd_get_process_device_data(q->device, q->process);
|
||||
if (!pdd) {
|
||||
retval = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) {
|
||||
retval = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
/*
|
||||
* Eviction state logic: we only mark active queues as evicted
|
||||
* to avoid the overhead of restoring inactive queues later
|
||||
*/
|
||||
if (pdd->qpd.evicted)
|
||||
q->properties.is_evicted = (q->properties.queue_size > 0 &&
|
||||
q->properties.queue_percent > 0 &&
|
||||
q->properties.queue_address != 0);
|
||||
|
||||
/* Save previous activity state for counters */
|
||||
prev_active = q->properties.is_active;
|
||||
|
||||
/* Make sure the queue is unmapped before updating the MQD */
|
||||
if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
|
||||
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
|
||||
retval = unmap_queues_cpsch(dqm,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
|
||||
if (retval) {
|
||||
|
@ -417,7 +446,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
|||
else if (!q->properties.is_active && prev_active)
|
||||
dqm->queue_count--;
|
||||
|
||||
if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
|
||||
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
|
||||
retval = map_queues_cpsch(dqm);
|
||||
else if (q->properties.is_active &&
|
||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||
|
@ -451,10 +480,193 @@ static struct mqd_manager *get_mqd_manager(
|
|||
return mqd;
|
||||
}
|
||||
|
||||
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct queue *q;
|
||||
struct mqd_manager *mqd;
|
||||
struct kfd_process_device *pdd;
|
||||
int retval = 0;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
if (qpd->evicted++ > 0) /* already evicted, do nothing */
|
||||
goto out;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
pr_info_ratelimited("Evicting PASID %u queues\n",
|
||||
pdd->process->pasid);
|
||||
|
||||
/* unactivate all active queues on the qpd */
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_active)
|
||||
continue;
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) { /* should not be here */
|
||||
pr_err("Cannot evict queue, mqd mgr is NULL\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
q->properties.is_evicted = true;
|
||||
q->properties.is_active = false;
|
||||
retval = mqd->destroy_mqd(mqd, q->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
|
||||
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
|
||||
if (retval)
|
||||
goto out;
|
||||
dqm->queue_count--;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct queue *q;
|
||||
struct kfd_process_device *pdd;
|
||||
int retval = 0;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
if (qpd->evicted++ > 0) /* already evicted, do nothing */
|
||||
goto out;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
pr_info_ratelimited("Evicting PASID %u queues\n",
|
||||
pdd->process->pasid);
|
||||
|
||||
/* unactivate all active queues on the qpd */
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_active)
|
||||
continue;
|
||||
q->properties.is_evicted = true;
|
||||
q->properties.is_active = false;
|
||||
dqm->queue_count--;
|
||||
}
|
||||
retval = execute_queues_cpsch(dqm,
|
||||
qpd->is_debug ?
|
||||
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
|
||||
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct queue *q;
|
||||
struct mqd_manager *mqd;
|
||||
struct kfd_process_device *pdd;
|
||||
uint32_t pd_base;
|
||||
int retval = 0;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
/* Retrieve PD base */
|
||||
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
|
||||
goto out;
|
||||
if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
|
||||
qpd->evicted--;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_info_ratelimited("Restoring PASID %u queues\n",
|
||||
pdd->process->pasid);
|
||||
|
||||
/* Update PD Base in QPD */
|
||||
qpd->page_table_base = pd_base;
|
||||
pr_debug("Updated PD address to 0x%08x\n", pd_base);
|
||||
|
||||
if (!list_empty(&qpd->queues_list)) {
|
||||
dqm->dev->kfd2kgd->set_vm_context_page_table_base(
|
||||
dqm->dev->kgd,
|
||||
qpd->vmid,
|
||||
qpd->page_table_base);
|
||||
kfd_flush_tlb(pdd);
|
||||
}
|
||||
|
||||
/* activate all active queues on the qpd */
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_evicted)
|
||||
continue;
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) { /* should not be here */
|
||||
pr_err("Cannot restore queue, mqd mgr is NULL\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
q->properties.is_evicted = false;
|
||||
q->properties.is_active = true;
|
||||
retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
|
||||
q->queue, &q->properties,
|
||||
q->process->mm);
|
||||
if (retval)
|
||||
goto out;
|
||||
dqm->queue_count++;
|
||||
}
|
||||
qpd->evicted = 0;
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct queue *q;
|
||||
struct kfd_process_device *pdd;
|
||||
uint32_t pd_base;
|
||||
int retval = 0;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
/* Retrieve PD base */
|
||||
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
|
||||
goto out;
|
||||
if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
|
||||
qpd->evicted--;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_info_ratelimited("Restoring PASID %u queues\n",
|
||||
pdd->process->pasid);
|
||||
|
||||
/* Update PD Base in QPD */
|
||||
qpd->page_table_base = pd_base;
|
||||
pr_debug("Updated PD address to 0x%08x\n", pd_base);
|
||||
|
||||
/* activate all active queues on the qpd */
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_evicted)
|
||||
continue;
|
||||
q->properties.is_evicted = false;
|
||||
q->properties.is_active = true;
|
||||
dqm->queue_count++;
|
||||
}
|
||||
retval = execute_queues_cpsch(dqm,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
|
||||
if (!retval)
|
||||
qpd->evicted = 0;
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int register_process(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct device_process_node *n;
|
||||
struct kfd_process_device *pdd;
|
||||
uint32_t pd_base;
|
||||
int retval;
|
||||
|
||||
n = kzalloc(sizeof(*n), GFP_KERNEL);
|
||||
|
@ -463,9 +675,16 @@ static int register_process(struct device_queue_manager *dqm,
|
|||
|
||||
n->qpd = qpd;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
/* Retrieve PD base */
|
||||
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
list_add(&n->list, &dqm->queues);
|
||||
|
||||
/* Update PD Base in QPD */
|
||||
qpd->page_table_base = pd_base;
|
||||
|
||||
retval = dqm->asic_ops.update_qpd(dqm, qpd);
|
||||
|
||||
dqm->processes_count++;
|
||||
|
@ -589,10 +808,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
|
|||
if (dqm->sdma_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
|
||||
bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
|
||||
CIK_SDMA_QUEUES);
|
||||
|
||||
clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
|
||||
bit = ffs(dqm->sdma_bitmap) - 1;
|
||||
dqm->sdma_bitmap &= ~(1 << bit);
|
||||
*sdma_queue_id = bit;
|
||||
|
||||
return 0;
|
||||
|
@ -603,7 +820,7 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
|||
{
|
||||
if (sdma_queue_id >= CIK_SDMA_QUEUES)
|
||||
return;
|
||||
set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
|
||||
dqm->sdma_bitmap |= (1 << sdma_queue_id);
|
||||
}
|
||||
|
||||
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
@ -840,6 +1057,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
|||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* Eviction state logic: we only mark active queues as evicted
|
||||
* to avoid the overhead of restoring inactive queues later
|
||||
*/
|
||||
if (qpd->evicted)
|
||||
q->properties.is_evicted = (q->properties.queue_size > 0 &&
|
||||
q->properties.queue_percent > 0 &&
|
||||
q->properties.queue_address != 0);
|
||||
|
||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||
|
||||
|
@ -1097,7 +1322,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
|
|||
alternate_aperture_base,
|
||||
alternate_aperture_size);
|
||||
|
||||
if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
|
||||
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
|
||||
program_sh_mem_settings(dqm, qpd);
|
||||
|
||||
pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
|
||||
|
@ -1242,8 +1467,24 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
if (!dqm)
|
||||
return NULL;
|
||||
|
||||
switch (dev->device_info->asic_family) {
|
||||
/* HWS is not available on Hawaii. */
|
||||
case CHIP_HAWAII:
|
||||
/* HWS depends on CWSR for timely dequeue. CWSR is not
|
||||
* available on Tonga.
|
||||
*
|
||||
* FIXME: This argument also applies to Kaveri.
|
||||
*/
|
||||
case CHIP_TONGA:
|
||||
dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
|
||||
break;
|
||||
default:
|
||||
dqm->sched_policy = sched_policy;
|
||||
break;
|
||||
}
|
||||
|
||||
dqm->dev = dev;
|
||||
switch (sched_policy) {
|
||||
switch (dqm->sched_policy) {
|
||||
case KFD_SCHED_POLICY_HWS:
|
||||
case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
|
||||
/* initialize dqm for cp scheduling */
|
||||
|
@ -1262,6 +1503,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
|
||||
dqm->ops.set_trap_handler = set_trap_handler;
|
||||
dqm->ops.process_termination = process_termination_cpsch;
|
||||
dqm->ops.evict_process_queues = evict_process_queues_cpsch;
|
||||
dqm->ops.restore_process_queues = restore_process_queues_cpsch;
|
||||
break;
|
||||
case KFD_SCHED_POLICY_NO_HWS:
|
||||
/* initialize dqm for no cp scheduling */
|
||||
|
@ -1278,9 +1521,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
|
||||
dqm->ops.set_trap_handler = set_trap_handler;
|
||||
dqm->ops.process_termination = process_termination_nocpsch;
|
||||
dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
|
||||
dqm->ops.restore_process_queues =
|
||||
restore_process_queues_nocpsch;
|
||||
break;
|
||||
default:
|
||||
pr_err("Invalid scheduling policy %d\n", sched_policy);
|
||||
pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
|
@ -1292,6 +1538,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
case CHIP_KAVERI:
|
||||
device_queue_manager_init_cik(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
case CHIP_HAWAII:
|
||||
device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
|
|
|
@ -79,6 +79,10 @@ struct device_process_node {
|
|||
*
|
||||
* @process_termination: Clears all process queues belongs to that device.
|
||||
*
|
||||
* @evict_process_queues: Evict all active queues of a process
|
||||
*
|
||||
* @restore_process_queues: Restore all evicted queues queues of a process
|
||||
*
|
||||
*/
|
||||
|
||||
struct device_queue_manager_ops {
|
||||
|
@ -129,6 +133,11 @@ struct device_queue_manager_ops {
|
|||
|
||||
int (*process_termination)(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
int (*evict_process_queues)(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
int (*restore_process_queues)(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
};
|
||||
|
||||
struct device_queue_manager_asic_ops {
|
||||
|
@ -180,12 +189,17 @@ struct device_queue_manager {
|
|||
unsigned int *fence_addr;
|
||||
struct kfd_mem_obj *fence_mem;
|
||||
bool active_runlist;
|
||||
int sched_policy;
|
||||
};
|
||||
|
||||
void device_queue_manager_init_cik(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_cik_hawaii(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_vi(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_vi_tonga(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
unsigned int get_queues_num(struct device_queue_manager *dqm);
|
||||
|
|
|
@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
|
|||
uint64_t alternate_aperture_size);
|
||||
static int update_qpd_cik(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
void device_queue_manager_init_cik(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
|
@ -45,6 +50,14 @@ void device_queue_manager_init_cik(
|
|||
asic_ops->init_sdma_vm = init_sdma_vm;
|
||||
}
|
||||
|
||||
void device_queue_manager_init_cik_hawaii(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
{
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||
asic_ops->update_qpd = update_qpd_cik_hawaii;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
{
|
||||
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
|
||||
|
@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
unsigned int temp;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
|
||||
/* check if sh_mem_config register already configured */
|
||||
if (qpd->sh_mem_config == 0) {
|
||||
qpd->sh_mem_config =
|
||||
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
|
||||
DEFAULT_MTYPE(MTYPE_NONCACHED) |
|
||||
APE1_MTYPE(MTYPE_NONCACHED);
|
||||
qpd->sh_mem_ape1_limit = 0;
|
||||
qpd->sh_mem_ape1_base = 0;
|
||||
}
|
||||
|
||||
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
|
||||
* aperture addresses.
|
||||
*/
|
||||
temp = get_sh_mem_bases_nybble_64(pdd);
|
||||
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
|
||||
|
||||
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
|
||||
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
|
@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
|||
|
||||
q->properties.sdma_vm_addr = value;
|
||||
}
|
||||
|
||||
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
|
||||
* aperture addresses.
|
||||
*/
|
||||
q->properties.sdma_vm_addr =
|
||||
((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
|
||||
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
|
||||
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
|
||||
}
|
||||
|
|
|
@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
|
|||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
static int update_qpd_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
void device_queue_manager_init_vi(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
|
@ -46,6 +57,14 @@ void device_queue_manager_init_vi(
|
|||
asic_ops->init_sdma_vm = init_sdma_vm;
|
||||
}
|
||||
|
||||
void device_queue_manager_init_vi_tonga(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
{
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
|
||||
asic_ops->update_qpd = update_qpd_vi_tonga;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
{
|
||||
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
|
||||
|
@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size)
|
||||
{
|
||||
uint32_t default_mtype;
|
||||
uint32_t ape1_mtype;
|
||||
|
||||
default_mtype = (default_policy == cache_policy_coherent) ?
|
||||
MTYPE_UC :
|
||||
MTYPE_NC;
|
||||
|
||||
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
|
||||
MTYPE_UC :
|
||||
MTYPE_NC;
|
||||
|
||||
qpd->sh_mem_config =
|
||||
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
|
||||
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
|
||||
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int update_qpd_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
|
@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
unsigned int temp;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
|
||||
/* check if sh_mem_config register already configured */
|
||||
if (qpd->sh_mem_config == 0) {
|
||||
qpd->sh_mem_config =
|
||||
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
|
||||
MTYPE_UC <<
|
||||
SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
|
||||
MTYPE_UC <<
|
||||
SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
|
||||
|
||||
qpd->sh_mem_ape1_limit = 0;
|
||||
qpd->sh_mem_ape1_base = 0;
|
||||
}
|
||||
|
||||
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
|
||||
* aperture addresses.
|
||||
*/
|
||||
temp = get_sh_mem_bases_nybble_64(pdd);
|
||||
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
|
||||
|
||||
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
|
||||
temp, qpd->sh_mem_bases);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
|
@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
|||
|
||||
q->properties.sdma_vm_addr = value;
|
||||
}
|
||||
|
||||
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
|
||||
* aperture addresses.
|
||||
*/
|
||||
q->properties.sdma_vm_addr =
|
||||
((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
|
||||
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
|
||||
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <linux/memory.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_events.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include <linux/device.h>
|
||||
|
||||
/*
|
||||
|
@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef KFD_SUPPORT_IOMMU_V2
|
||||
void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
|
||||
unsigned long address, bool is_write_requested,
|
||||
bool is_execute_requested)
|
||||
|
@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
|
|||
mutex_unlock(&p->event_mutex);
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
#endif /* KFD_SUPPORT_IOMMU_V2 */
|
||||
|
||||
void kfd_signal_hw_exception_event(unsigned int pasid)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,357 @@
|
|||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/printk.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/amd-iommu.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_dbgmgr.h"
|
||||
#include "kfd_topology.h"
|
||||
#include "kfd_iommu.h"
|
||||
|
||||
static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
|
||||
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
|
||||
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
|
||||
|
||||
/** kfd_iommu_check_device - Check whether IOMMU is available for device
|
||||
*/
|
||||
int kfd_iommu_check_device(struct kfd_dev *kfd)
|
||||
{
|
||||
struct amd_iommu_device_info iommu_info;
|
||||
int err;
|
||||
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
return -ENODEV;
|
||||
|
||||
iommu_info.flags = 0;
|
||||
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
|
||||
return -ENODEV;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** kfd_iommu_device_init - Initialize IOMMU for device
|
||||
*/
|
||||
int kfd_iommu_device_init(struct kfd_dev *kfd)
|
||||
{
|
||||
struct amd_iommu_device_info iommu_info;
|
||||
unsigned int pasid_limit;
|
||||
int err;
|
||||
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
return 0;
|
||||
|
||||
iommu_info.flags = 0;
|
||||
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
|
||||
if (err < 0) {
|
||||
dev_err(kfd_device,
|
||||
"error getting iommu info. is the iommu enabled?\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
|
||||
dev_err(kfd_device,
|
||||
"error required iommu flags ats %i, pri %i, pasid %i\n",
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
|
||||
!= 0);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
pasid_limit = min_t(unsigned int,
|
||||
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
|
||||
iommu_info.max_pasids);
|
||||
|
||||
if (!kfd_set_pasid_limit(pasid_limit)) {
|
||||
dev_err(kfd_device, "error setting pasid limit\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
|
||||
*
|
||||
* Binds the given process to the given device using its PASID. This
|
||||
* enables IOMMUv2 address translation for the process on the device.
|
||||
*
|
||||
* This function assumes that the process mutex is held.
|
||||
*/
|
||||
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
|
||||
{
|
||||
struct kfd_dev *dev = pdd->dev;
|
||||
struct kfd_process *p = pdd->process;
|
||||
int err;
|
||||
|
||||
if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
|
||||
return 0;
|
||||
|
||||
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
|
||||
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
|
||||
if (!err)
|
||||
pdd->bound = PDD_BOUND;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/** kfd_iommu_unbind_process - Unbind process from all devices
|
||||
*
|
||||
* This removes all IOMMU device bindings of the process. To be used
|
||||
* before process termination.
|
||||
*/
|
||||
void kfd_iommu_unbind_process(struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
|
||||
if (pdd->bound == PDD_BOUND)
|
||||
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
|
||||
}
|
||||
|
||||
/* Callback for process shutdown invoked by the IOMMU driver */
|
||||
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
|
||||
{
|
||||
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
|
||||
struct kfd_process *p;
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
if (!dev)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Look for the process that matches the pasid. If there is no such
|
||||
* process, we either released it in amdkfd's own notifier, or there
|
||||
* is a bug. Unfortunately, there is no way to tell...
|
||||
*/
|
||||
p = kfd_lookup_process_by_pasid(pasid);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
pr_debug("Unbinding process %d from IOMMU\n", pasid);
|
||||
|
||||
mutex_lock(kfd_get_dbgmgr_mutex());
|
||||
|
||||
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
|
||||
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
|
||||
kfd_dbgmgr_destroy(dev->dbgmgr);
|
||||
dev->dbgmgr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(kfd_get_dbgmgr_mutex());
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
pdd = kfd_get_process_device_data(dev, p);
|
||||
if (pdd)
|
||||
/* For GPU relying on IOMMU, we need to dequeue here
|
||||
* when PASID is still bound.
|
||||
*/
|
||||
kfd_process_dequeue_from_device(pdd);
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
|
||||
/* This function called by IOMMU driver on PPR failure */
|
||||
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
|
||||
unsigned long address, u16 flags)
|
||||
{
|
||||
struct kfd_dev *dev;
|
||||
|
||||
dev_warn(kfd_device,
|
||||
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
|
||||
PCI_BUS_NUM(pdev->devfn),
|
||||
PCI_SLOT(pdev->devfn),
|
||||
PCI_FUNC(pdev->devfn),
|
||||
pasid,
|
||||
address,
|
||||
flags);
|
||||
|
||||
dev = kfd_device_by_pci_dev(pdev);
|
||||
if (!WARN_ON(!dev))
|
||||
kfd_signal_iommu_event(dev, pasid, address,
|
||||
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
|
||||
|
||||
return AMD_IOMMU_INV_PRI_RSP_INVALID;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bind processes do the device that have been temporarily unbound
|
||||
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
|
||||
*/
|
||||
static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
struct kfd_process *p;
|
||||
unsigned int temp;
|
||||
int err = 0;
|
||||
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
mutex_lock(&p->mutex);
|
||||
pdd = kfd_get_process_device_data(kfd, p);
|
||||
|
||||
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
|
||||
mutex_unlock(&p->mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
|
||||
p->lead_thread);
|
||||
if (err < 0) {
|
||||
pr_err("Unexpected pasid %d binding failure\n",
|
||||
p->pasid);
|
||||
mutex_unlock(&p->mutex);
|
||||
break;
|
||||
}
|
||||
|
||||
pdd->bound = PDD_BOUND;
|
||||
mutex_unlock(&p->mutex);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
|
||||
* processes will be restored to PDD_BOUND state in
|
||||
* kfd_bind_processes_to_device.
|
||||
*/
|
||||
static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
struct kfd_process *p;
|
||||
unsigned int temp;
|
||||
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
mutex_lock(&p->mutex);
|
||||
pdd = kfd_get_process_device_data(kfd, p);
|
||||
|
||||
if (WARN_ON(!pdd)) {
|
||||
mutex_unlock(&p->mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pdd->bound == PDD_BOUND)
|
||||
pdd->bound = PDD_BOUND_SUSPENDED;
|
||||
mutex_unlock(&p->mutex);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
}
|
||||
|
||||
/** kfd_iommu_suspend - Prepare IOMMU for suspend
|
||||
*
|
||||
* This unbinds processes from the device and disables the IOMMU for
|
||||
* the device.
|
||||
*/
|
||||
void kfd_iommu_suspend(struct kfd_dev *kfd)
|
||||
{
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
return;
|
||||
|
||||
kfd_unbind_processes_from_device(kfd);
|
||||
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
|
||||
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
|
||||
amd_iommu_free_device(kfd->pdev);
|
||||
}
|
||||
|
||||
/** kfd_iommu_resume - Restore IOMMU after resume
|
||||
*
|
||||
* This reinitializes the IOMMU for the device and re-binds previously
|
||||
* suspended processes to the device.
|
||||
*/
|
||||
int kfd_iommu_resume(struct kfd_dev *kfd)
|
||||
{
|
||||
unsigned int pasid_limit;
|
||||
int err;
|
||||
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
return 0;
|
||||
|
||||
pasid_limit = kfd_get_pasid_limit();
|
||||
|
||||
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
|
||||
if (err)
|
||||
return -ENXIO;
|
||||
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
|
||||
iommu_pasid_shutdown_callback);
|
||||
amd_iommu_set_invalid_ppr_cb(kfd->pdev,
|
||||
iommu_invalid_ppr_cb);
|
||||
|
||||
err = kfd_bind_processes_to_device(kfd);
|
||||
if (err) {
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
|
||||
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
|
||||
amd_iommu_free_device(kfd->pdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
|
||||
/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
|
||||
*/
|
||||
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
|
||||
{
|
||||
struct kfd_perf_properties *props;
|
||||
|
||||
if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
|
||||
return 0;
|
||||
|
||||
if (!amd_iommu_pc_supported())
|
||||
return 0;
|
||||
|
||||
props = kfd_alloc_struct(props);
|
||||
if (!props)
|
||||
return -ENOMEM;
|
||||
strcpy(props->block_name, "iommu");
|
||||
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
|
||||
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
|
||||
list_add_tail(&props->list, &kdev->perf_props);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __KFD_IOMMU_H__
|
||||
#define __KFD_IOMMU_H__
|
||||
|
||||
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
|
||||
|
||||
#define KFD_SUPPORT_IOMMU_V2
|
||||
|
||||
int kfd_iommu_check_device(struct kfd_dev *kfd);
|
||||
int kfd_iommu_device_init(struct kfd_dev *kfd);
|
||||
|
||||
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
|
||||
void kfd_iommu_unbind_process(struct kfd_process *p);
|
||||
|
||||
void kfd_iommu_suspend(struct kfd_dev *kfd);
|
||||
int kfd_iommu_resume(struct kfd_dev *kfd);
|
||||
|
||||
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
|
||||
|
||||
#else
|
||||
|
||||
static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kfd_iommu_bind_process_to_device(
|
||||
struct kfd_process_device *pdd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void kfd_iommu_unbind_process(struct kfd_process *p)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
static inline int kfd_iommu_resume(struct kfd_dev *kfd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* defined(CONFIG_AMD_IOMMU_V2) */
|
||||
|
||||
#endif /* __KFD_IOMMU_H__ */
|
|
@ -297,10 +297,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
|||
|
||||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
kernel_queue_init_vi(&kq->ops_asic_specific);
|
||||
break;
|
||||
|
||||
case CHIP_KAVERI:
|
||||
case CHIP_HAWAII:
|
||||
kernel_queue_init_cik(&kq->ops_asic_specific);
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
|
|||
.interrupt = kgd2kfd_interrupt,
|
||||
.suspend = kgd2kfd_suspend,
|
||||
.resume = kgd2kfd_resume,
|
||||
.schedule_evict_and_restore_process =
|
||||
kgd2kfd_schedule_evict_and_restore_process,
|
||||
};
|
||||
|
||||
int sched_policy = KFD_SCHED_POLICY_HWS;
|
||||
|
|
|
@ -29,8 +29,15 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
|||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_KAVERI:
|
||||
return mqd_manager_init_cik(type, dev);
|
||||
case CHIP_HAWAII:
|
||||
return mqd_manager_init_cik_hawaii(type, dev);
|
||||
case CHIP_CARRIZO:
|
||||
return mqd_manager_init_vi(type, dev);
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
return mqd_manager_init_vi_tonga(type, dev);
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
|
|
|
@ -170,14 +170,19 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
|||
mms);
|
||||
}
|
||||
|
||||
static int update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
static int __update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q, unsigned int atc_bit)
|
||||
{
|
||||
struct cik_mqd *m;
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
|
||||
DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
|
||||
DEFAULT_MIN_AVAIL_SIZE;
|
||||
m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
|
||||
if (atc_bit) {
|
||||
m->cp_hqd_pq_control |= PQ_ATC_EN;
|
||||
m->cp_hqd_ib_control |= IB_ATC_EN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculating queue size which is log base 2 of actual queue size -1
|
||||
|
@ -197,11 +202,24 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
|
|||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0);
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
return __update_mqd(mm, mqd, q, 1);
|
||||
}
|
||||
|
||||
static int update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
return __update_mqd(mm, mqd, q, 0);
|
||||
}
|
||||
|
||||
static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
|
@ -228,7 +246,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
|||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0);
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -360,7 +379,8 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
|||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0);
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -441,3 +461,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
|||
return mqd;
|
||||
}
|
||||
|
||||
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
|
||||
mqd = mqd_manager_init_cik(type, dev);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
|
||||
mqd->update_mqd = update_mqd_hawaii;
|
||||
return mqd;
|
||||
}
|
||||
|
|
|
@ -151,6 +151,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
|
|||
|
||||
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
|
||||
m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
|
||||
|
||||
m->cp_hqd_pq_doorbell_control =
|
||||
q->doorbell_off <<
|
||||
|
@ -196,7 +198,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
|
|||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0);
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -208,6 +211,12 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
|
|||
return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
|
||||
}
|
||||
|
||||
static int update_mqd_tonga(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
return __update_mqd(mm, mqd, q, MTYPE_UC, 0);
|
||||
}
|
||||
|
||||
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
|
||||
enum kfd_preempt_type type,
|
||||
unsigned int timeout, uint32_t pipe_id,
|
||||
|
@ -334,7 +343,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
|||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0);
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -432,3 +442,16 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
|||
|
||||
return mqd;
|
||||
}
|
||||
|
||||
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
|
||||
mqd = mqd_manager_init_vi(type, dev);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
|
||||
mqd->update_mqd = update_mqd_tonga;
|
||||
return mqd;
|
||||
}
|
||||
|
|
|
@ -158,6 +158,8 @@ struct kfd_device_info {
|
|||
uint8_t num_of_watch_points;
|
||||
uint16_t mqd_size_aligned;
|
||||
bool supports_cwsr;
|
||||
bool needs_iommu_device;
|
||||
bool needs_pci_atomics;
|
||||
};
|
||||
|
||||
struct kfd_mem_obj {
|
||||
|
@ -333,7 +335,11 @@ enum kfd_queue_format {
|
|||
* @is_interop: Defines if this is a interop queue. Interop queue means that
|
||||
* the queue can access both graphics and compute resources.
|
||||
*
|
||||
* @is_active: Defines if the queue is active or not.
|
||||
* @is_evicted: Defines if the queue is evicted. Only active queues
|
||||
* are evicted, rendering them inactive.
|
||||
*
|
||||
* @is_active: Defines if the queue is active or not. @is_active and
|
||||
* @is_evicted are protected by the DQM lock.
|
||||
*
|
||||
* @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
|
||||
* of the queue.
|
||||
|
@ -355,6 +361,7 @@ struct queue_properties {
|
|||
uint32_t __iomem *doorbell_ptr;
|
||||
uint32_t doorbell_off;
|
||||
bool is_interop;
|
||||
bool is_evicted;
|
||||
bool is_active;
|
||||
/* Not relevant for user mode queues in cp scheduling */
|
||||
unsigned int vmid;
|
||||
|
@ -458,6 +465,7 @@ struct qcm_process_device {
|
|||
unsigned int queue_count;
|
||||
unsigned int vmid;
|
||||
bool is_debug;
|
||||
unsigned int evicted; /* eviction counter, 0=active */
|
||||
|
||||
/* This flag tells if we should reset all wavefronts on
|
||||
* process termination
|
||||
|
@ -484,6 +492,17 @@ struct qcm_process_device {
|
|||
uint64_t tma_addr;
|
||||
};
|
||||
|
||||
/* KFD Memory Eviction */
|
||||
|
||||
/* Approx. wait time before attempting to restore evicted BOs */
|
||||
#define PROCESS_RESTORE_TIME_MS 100
|
||||
/* Approx. back off time if restore fails due to lack of memory */
|
||||
#define PROCESS_BACK_OFF_TIME_MS 100
|
||||
/* Approx. time before evicting the process again */
|
||||
#define PROCESS_ACTIVE_TIME_MS 10
|
||||
|
||||
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
|
||||
enum kfd_pdd_bound {
|
||||
PDD_UNBOUND = 0,
|
||||
|
@ -516,8 +535,8 @@ struct kfd_process_device {
|
|||
uint64_t scratch_base;
|
||||
uint64_t scratch_limit;
|
||||
|
||||
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
|
||||
enum kfd_pdd_bound bound;
|
||||
/* VM context for GPUVM allocations */
|
||||
void *vm;
|
||||
|
||||
/* Flag used to tell the pdd has dequeued from the dqm.
|
||||
* This is used to prevent dev->dqm->ops.process_termination() from
|
||||
|
@ -525,6 +544,9 @@ struct kfd_process_device {
|
|||
* function.
|
||||
*/
|
||||
bool already_dequeued;
|
||||
|
||||
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
|
||||
enum kfd_pdd_bound bound;
|
||||
};
|
||||
|
||||
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
|
||||
|
@ -587,8 +609,30 @@ struct kfd_process {
|
|||
size_t signal_mapped_size;
|
||||
size_t signal_event_count;
|
||||
bool signal_event_limit_reached;
|
||||
|
||||
/* Information used for memory eviction */
|
||||
void *kgd_process_info;
|
||||
/* Eviction fence that is attached to all the BOs of this process. The
|
||||
* fence will be triggered during eviction and new one will be created
|
||||
* during restore
|
||||
*/
|
||||
struct dma_fence *ef;
|
||||
|
||||
/* Work items for evicting and restoring BOs */
|
||||
struct delayed_work eviction_work;
|
||||
struct delayed_work restore_work;
|
||||
/* seqno of the last scheduled eviction */
|
||||
unsigned int last_eviction_seqno;
|
||||
/* Approx. the last timestamp (in jiffies) when the process was
|
||||
* restored after an eviction
|
||||
*/
|
||||
unsigned long last_restore_timestamp;
|
||||
};
|
||||
|
||||
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
|
||||
extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
|
||||
extern struct srcu_struct kfd_processes_srcu;
|
||||
|
||||
/**
|
||||
* Ioctl function type.
|
||||
*
|
||||
|
@ -612,13 +656,13 @@ void kfd_process_destroy_wq(void);
|
|||
struct kfd_process *kfd_create_process(struct file *filep);
|
||||
struct kfd_process *kfd_get_process(const struct task_struct *);
|
||||
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
|
||||
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
|
||||
void kfd_unref_process(struct kfd_process *p);
|
||||
void kfd_suspend_all_processes(void);
|
||||
int kfd_resume_all_processes(void);
|
||||
|
||||
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
|
||||
struct kfd_process *p);
|
||||
int kfd_bind_processes_to_device(struct kfd_dev *dev);
|
||||
void kfd_unbind_processes_from_device(struct kfd_dev *dev);
|
||||
void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid);
|
||||
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process *p);
|
||||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
|
@ -705,8 +749,12 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
|||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
|
||||
void device_queue_manager_uninit(struct device_queue_manager *dqm);
|
||||
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
|
@ -795,6 +843,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
|
|||
uint64_t *event_page_offset, uint32_t *event_slot_index);
|
||||
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
|
||||
|
||||
void kfd_flush_tlb(struct kfd_process_device *pdd);
|
||||
|
||||
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
|
||||
|
||||
/* Debugfs */
|
||||
|
|
|
@ -34,17 +34,18 @@
|
|||
struct mm_struct;
|
||||
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_dbgmgr.h"
|
||||
#include "kfd_iommu.h"
|
||||
|
||||
/*
|
||||
* List of struct kfd_process (field kfd_process).
|
||||
* Unique/indexed by mm_struct*
|
||||
*/
|
||||
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
|
||||
static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
|
||||
DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
|
||||
static DEFINE_MUTEX(kfd_processes_mutex);
|
||||
|
||||
DEFINE_STATIC_SRCU(kfd_processes_srcu);
|
||||
DEFINE_SRCU(kfd_processes_srcu);
|
||||
|
||||
static struct workqueue_struct *kfd_process_wq;
|
||||
|
||||
|
@ -54,6 +55,9 @@ static struct kfd_process *create_process(const struct task_struct *thread,
|
|||
struct file *filep);
|
||||
static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
|
||||
|
||||
static void evict_process_worker(struct work_struct *work);
|
||||
static void restore_process_worker(struct work_struct *work);
|
||||
|
||||
|
||||
void kfd_process_create_wq(void)
|
||||
{
|
||||
|
@ -154,6 +158,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
|
|||
pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
|
||||
pdd->dev->id, p->pasid);
|
||||
|
||||
if (pdd->vm)
|
||||
pdd->dev->kfd2kgd->destroy_process_vm(
|
||||
pdd->dev->kgd, pdd->vm);
|
||||
|
||||
list_del(&pdd->per_device_list);
|
||||
|
||||
if (pdd->qpd.cwsr_kaddr)
|
||||
|
@ -173,16 +181,11 @@ static void kfd_process_wq_release(struct work_struct *work)
|
|||
{
|
||||
struct kfd_process *p = container_of(work, struct kfd_process,
|
||||
release_work);
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid);
|
||||
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||
if (pdd->bound == PDD_BOUND)
|
||||
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
|
||||
}
|
||||
kfd_iommu_unbind_process(p);
|
||||
|
||||
kfd_process_destroy_pdds(p);
|
||||
dma_fence_put(p->ef);
|
||||
|
||||
kfd_event_free_process(p);
|
||||
|
||||
|
@ -230,6 +233,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
|
|||
mutex_unlock(&kfd_processes_mutex);
|
||||
synchronize_srcu(&kfd_processes_srcu);
|
||||
|
||||
cancel_delayed_work_sync(&p->eviction_work);
|
||||
cancel_delayed_work_sync(&p->restore_work);
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
/* Iterate over all process device data structures and if the
|
||||
|
@ -351,6 +357,10 @@ static struct kfd_process *create_process(const struct task_struct *thread,
|
|||
if (err != 0)
|
||||
goto err_init_apertures;
|
||||
|
||||
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
|
||||
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
|
||||
process->last_restore_timestamp = get_jiffies_64();
|
||||
|
||||
err = kfd_process_init_cwsr(process, filep);
|
||||
if (err)
|
||||
goto err_init_cwsr;
|
||||
|
@ -402,12 +412,24 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
|||
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
|
||||
pdd->qpd.dqm = dev->dqm;
|
||||
pdd->qpd.pqm = &p->pqm;
|
||||
pdd->qpd.evicted = 0;
|
||||
pdd->process = p;
|
||||
pdd->bound = PDD_UNBOUND;
|
||||
pdd->already_dequeued = false;
|
||||
list_add(&pdd->per_device_list, &p->per_device_data);
|
||||
|
||||
/* Create the GPUVM context for this specific device */
|
||||
if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
|
||||
&p->kgd_process_info, &p->ef)) {
|
||||
pr_err("Failed to create process VM object\n");
|
||||
goto err_create_pdd;
|
||||
}
|
||||
return pdd;
|
||||
|
||||
err_create_pdd:
|
||||
list_del(&pdd->per_device_list);
|
||||
kfree(pdd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -429,133 +451,13 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
|
|||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
if (pdd->bound == PDD_BOUND) {
|
||||
return pdd;
|
||||
} else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
|
||||
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
|
||||
if (err < 0)
|
||||
err = kfd_iommu_bind_process_to_device(pdd);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
pdd->bound = PDD_BOUND;
|
||||
|
||||
return pdd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bind processes do the device that have been temporarily unbound
|
||||
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
|
||||
*/
|
||||
int kfd_bind_processes_to_device(struct kfd_dev *dev)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
struct kfd_process *p;
|
||||
unsigned int temp;
|
||||
int err = 0;
|
||||
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
mutex_lock(&p->mutex);
|
||||
pdd = kfd_get_process_device_data(dev, p);
|
||||
|
||||
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
|
||||
mutex_unlock(&p->mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
|
||||
p->lead_thread);
|
||||
if (err < 0) {
|
||||
pr_err("Unexpected pasid %d binding failure\n",
|
||||
p->pasid);
|
||||
mutex_unlock(&p->mutex);
|
||||
break;
|
||||
}
|
||||
|
||||
pdd->bound = PDD_BOUND;
|
||||
mutex_unlock(&p->mutex);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
|
||||
* processes will be restored to PDD_BOUND state in
|
||||
* kfd_bind_processes_to_device.
|
||||
*/
|
||||
void kfd_unbind_processes_from_device(struct kfd_dev *dev)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
struct kfd_process *p;
|
||||
unsigned int temp;
|
||||
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
mutex_lock(&p->mutex);
|
||||
pdd = kfd_get_process_device_data(dev, p);
|
||||
|
||||
if (WARN_ON(!pdd)) {
|
||||
mutex_unlock(&p->mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pdd->bound == PDD_BOUND)
|
||||
pdd->bound = PDD_BOUND_SUSPENDED;
|
||||
mutex_unlock(&p->mutex);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
}
|
||||
|
||||
void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
/*
|
||||
* Look for the process that matches the pasid. If there is no such
|
||||
* process, we either released it in amdkfd's own notifier, or there
|
||||
* is a bug. Unfortunately, there is no way to tell...
|
||||
*/
|
||||
p = kfd_lookup_process_by_pasid(pasid);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
pr_debug("Unbinding process %d from IOMMU\n", pasid);
|
||||
|
||||
mutex_lock(kfd_get_dbgmgr_mutex());
|
||||
|
||||
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
|
||||
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
|
||||
kfd_dbgmgr_destroy(dev->dbgmgr);
|
||||
dev->dbgmgr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(kfd_get_dbgmgr_mutex());
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
pdd = kfd_get_process_device_data(dev, p);
|
||||
if (pdd)
|
||||
/* For GPU relying on IOMMU, we need to dequeue here
|
||||
* when PASID is still bound.
|
||||
*/
|
||||
kfd_process_dequeue_from_device(pdd);
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
|
||||
struct kfd_process_device *kfd_get_first_process_device_data(
|
||||
struct kfd_process *p)
|
||||
{
|
||||
|
@ -599,6 +501,208 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
|
|||
return ret_p;
|
||||
}
|
||||
|
||||
/* This increments the process->ref counter. */
|
||||
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
p = find_process_by_mm(mm);
|
||||
if (p)
|
||||
kref_get(&p->ref);
|
||||
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* process_evict_queues - Evict all user queues of a process
|
||||
*
|
||||
* Eviction is reference-counted per process-device. This means multiple
|
||||
* evictions from different sources can be nested safely.
|
||||
*/
|
||||
static int process_evict_queues(struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
int r = 0;
|
||||
unsigned int n_evicted = 0;
|
||||
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
|
||||
&pdd->qpd);
|
||||
if (r) {
|
||||
pr_err("Failed to evict process queues\n");
|
||||
goto fail;
|
||||
}
|
||||
n_evicted++;
|
||||
}
|
||||
|
||||
return r;
|
||||
|
||||
fail:
|
||||
/* To keep state consistent, roll back partial eviction by
|
||||
* restoring queues
|
||||
*/
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||
if (n_evicted == 0)
|
||||
break;
|
||||
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
|
||||
&pdd->qpd))
|
||||
pr_err("Failed to restore queues\n");
|
||||
|
||||
n_evicted--;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* process_restore_queues - Restore all user queues of a process */
|
||||
static int process_restore_queues(struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
int r, ret = 0;
|
||||
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
|
||||
&pdd->qpd);
|
||||
if (r) {
|
||||
pr_err("Failed to restore process queues\n");
|
||||
if (!ret)
|
||||
ret = r;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void evict_process_worker(struct work_struct *work)
|
||||
{
|
||||
int ret;
|
||||
struct kfd_process *p;
|
||||
struct delayed_work *dwork;
|
||||
|
||||
dwork = to_delayed_work(work);
|
||||
|
||||
/* Process termination destroys this worker thread. So during the
|
||||
* lifetime of this thread, kfd_process p will be valid
|
||||
*/
|
||||
p = container_of(dwork, struct kfd_process, eviction_work);
|
||||
WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
|
||||
"Eviction fence mismatch\n");
|
||||
|
||||
/* Narrow window of overlap between restore and evict work
|
||||
* item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
|
||||
* unreserves KFD BOs, it is possible to evicted again. But
|
||||
* restore has few more steps of finish. So lets wait for any
|
||||
* previous restore work to complete
|
||||
*/
|
||||
flush_delayed_work(&p->restore_work);
|
||||
|
||||
pr_debug("Started evicting pasid %d\n", p->pasid);
|
||||
ret = process_evict_queues(p);
|
||||
if (!ret) {
|
||||
dma_fence_signal(p->ef);
|
||||
dma_fence_put(p->ef);
|
||||
p->ef = NULL;
|
||||
schedule_delayed_work(&p->restore_work,
|
||||
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
|
||||
|
||||
pr_debug("Finished evicting pasid %d\n", p->pasid);
|
||||
} else
|
||||
pr_err("Failed to evict queues of pasid %d\n", p->pasid);
|
||||
}
|
||||
|
||||
static void restore_process_worker(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork;
|
||||
struct kfd_process *p;
|
||||
struct kfd_process_device *pdd;
|
||||
int ret = 0;
|
||||
|
||||
dwork = to_delayed_work(work);
|
||||
|
||||
/* Process termination destroys this worker thread. So during the
|
||||
* lifetime of this thread, kfd_process p will be valid
|
||||
*/
|
||||
p = container_of(dwork, struct kfd_process, restore_work);
|
||||
|
||||
/* Call restore_process_bos on the first KGD device. This function
|
||||
* takes care of restoring the whole process including other devices.
|
||||
* Restore can fail if enough memory is not available. If so,
|
||||
* reschedule again.
|
||||
*/
|
||||
pdd = list_first_entry(&p->per_device_data,
|
||||
struct kfd_process_device,
|
||||
per_device_list);
|
||||
|
||||
pr_debug("Started restoring pasid %d\n", p->pasid);
|
||||
|
||||
/* Setting last_restore_timestamp before successful restoration.
|
||||
* Otherwise this would have to be set by KGD (restore_process_bos)
|
||||
* before KFD BOs are unreserved. If not, the process can be evicted
|
||||
* again before the timestamp is set.
|
||||
* If restore fails, the timestamp will be set again in the next
|
||||
* attempt. This would mean that the minimum GPU quanta would be
|
||||
* PROCESS_ACTIVE_TIME_MS - (time to execute the following two
|
||||
* functions)
|
||||
*/
|
||||
|
||||
p->last_restore_timestamp = get_jiffies_64();
|
||||
ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
|
||||
&p->ef);
|
||||
if (ret) {
|
||||
pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
|
||||
p->pasid, PROCESS_BACK_OFF_TIME_MS);
|
||||
ret = schedule_delayed_work(&p->restore_work,
|
||||
msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
|
||||
WARN(!ret, "reschedule restore work failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = process_restore_queues(p);
|
||||
if (!ret)
|
||||
pr_debug("Finished restoring pasid %d\n", p->pasid);
|
||||
else
|
||||
pr_err("Failed to restore queues of pasid %d\n", p->pasid);
|
||||
}
|
||||
|
||||
void kfd_suspend_all_processes(void)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
unsigned int temp;
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
cancel_delayed_work_sync(&p->eviction_work);
|
||||
cancel_delayed_work_sync(&p->restore_work);
|
||||
|
||||
if (process_evict_queues(p))
|
||||
pr_err("Failed to suspend process %d\n", p->pasid);
|
||||
dma_fence_signal(p->ef);
|
||||
dma_fence_put(p->ef);
|
||||
p->ef = NULL;
|
||||
}
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
}
|
||||
|
||||
int kfd_resume_all_processes(void)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
unsigned int temp;
|
||||
int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
if (!schedule_delayed_work(&p->restore_work, 0)) {
|
||||
pr_err("Restore process %d failed during resume\n",
|
||||
p->pasid);
|
||||
ret = -EFAULT;
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kfd_reserved_mem_mmap(struct kfd_process *process,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
|
@ -633,6 +737,22 @@ int kfd_reserved_mem_mmap(struct kfd_process *process,
|
|||
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
|
||||
}
|
||||
|
||||
void kfd_flush_tlb(struct kfd_process_device *pdd)
|
||||
{
|
||||
struct kfd_dev *dev = pdd->dev;
|
||||
const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
|
||||
|
||||
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
|
||||
/* Nothing to flush until a VMID is assigned, which
|
||||
* only happens when the first queue is created.
|
||||
*/
|
||||
if (pdd->qpd.vmid)
|
||||
f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
|
||||
} else {
|
||||
f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
|
||||
|
|
|
@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
|||
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
/* check if there is over subscription */
|
||||
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
|
||||
if ((dev->dqm->sched_policy ==
|
||||
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
|
||||
((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
|
||||
(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
|
||||
pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "kfd_crat.h"
|
||||
#include "kfd_topology.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_iommu.h"
|
||||
|
||||
/* topology_device_list - Master list of all topology devices */
|
||||
static struct list_head topology_device_list;
|
||||
|
@ -677,7 +678,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
|
|||
}
|
||||
|
||||
/* All hardware blocks have the same number of attributes. */
|
||||
num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
|
||||
num_attrs = ARRAY_SIZE(perf_attr_iommu);
|
||||
list_for_each_entry(perf, &dev->perf_props, list) {
|
||||
perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
|
||||
* num_attrs + sizeof(struct attribute_group),
|
||||
|
@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm,
|
|||
*/
|
||||
static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
|
||||
{
|
||||
struct kfd_perf_properties *props;
|
||||
|
||||
if (amd_iommu_pc_supported()) {
|
||||
props = kfd_alloc_struct(props);
|
||||
if (!props)
|
||||
return -ENOMEM;
|
||||
strcpy(props->block_name, "iommu");
|
||||
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
|
||||
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
|
||||
list_add_tail(&props->list, &kdev->perf_props);
|
||||
}
|
||||
|
||||
return 0;
|
||||
/* These are the only counters supported so far */
|
||||
return kfd_iommu_add_perf_counters(kdev);
|
||||
}
|
||||
|
||||
/* kfd_add_non_crat_information - Add information that is not currently
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#include <linux/types.h>
|
||||
#include <linux/list.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_crat.h"
|
||||
|
||||
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128
|
||||
|
||||
|
@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device(
|
|||
struct list_head *device_list);
|
||||
void kfd_release_topology_device_list(struct list_head *device_list);
|
||||
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
|
||||
#endif /* __KFD_TOPOLOGY_H__ */
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
#include <linux/types.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/dma-fence.h>
|
||||
|
||||
struct pci_dev;
|
||||
|
||||
|
@ -107,6 +108,12 @@ struct kgd2kfd_shared_resources {
|
|||
|
||||
/* Number of bytes at start of aperture reserved for KGD. */
|
||||
size_t doorbell_start_offset;
|
||||
|
||||
/* GPUVM address space size in bytes */
|
||||
uint64_t gpuvm_size;
|
||||
|
||||
/* Minor device number of the render node */
|
||||
int drm_render_minor;
|
||||
};
|
||||
|
||||
struct tile_config {
|
||||
|
@ -120,6 +127,25 @@ struct tile_config {
|
|||
uint32_t num_ranks;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Allocation flag domains
|
||||
*/
|
||||
#define ALLOC_MEM_FLAGS_VRAM (1 << 0)
|
||||
#define ALLOC_MEM_FLAGS_GTT (1 << 1)
|
||||
#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) /* TODO */
|
||||
#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* TODO */
|
||||
|
||||
/*
|
||||
* Allocation flags attributes/access options.
|
||||
*/
|
||||
#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
|
||||
#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
|
||||
#define ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
|
||||
#define ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) /* TODO */
|
||||
#define ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
|
||||
#define ALLOC_MEM_FLAGS_COHERENT (1 << 26) /* For GFXv9 or later */
|
||||
|
||||
/**
|
||||
* struct kfd2kgd_calls
|
||||
*
|
||||
|
@ -179,6 +205,45 @@ struct tile_config {
|
|||
*
|
||||
* @get_vram_usage: Returns current VRAM usage
|
||||
*
|
||||
* @create_process_vm: Create a VM address space for a given process and GPU
|
||||
*
|
||||
* @destroy_process_vm: Destroy a VM
|
||||
*
|
||||
* @get_process_page_dir: Get physical address of a VM page directory
|
||||
*
|
||||
* @set_vm_context_page_table_base: Program page table base for a VMID
|
||||
*
|
||||
* @alloc_memory_of_gpu: Allocate GPUVM memory
|
||||
*
|
||||
* @free_memory_of_gpu: Free GPUVM memory
|
||||
*
|
||||
* @map_memory_to_gpu: Map GPUVM memory into a specific VM address
|
||||
* space. Allocates and updates page tables and page directories as
|
||||
* needed. This function may return before all page table updates have
|
||||
* completed. This allows multiple map operations (on multiple GPUs)
|
||||
* to happen concurrently. Use sync_memory to synchronize with all
|
||||
* pending updates.
|
||||
*
|
||||
* @unmap_memor_to_gpu: Unmap GPUVM memory from a specific VM address space
|
||||
*
|
||||
* @sync_memory: Wait for pending page table updates to complete
|
||||
*
|
||||
* @map_gtt_bo_to_kernel: Map a GTT BO for kernel access
|
||||
* Pins the BO, maps it to kernel address space. Such BOs are never evicted.
|
||||
* The kernel virtual address remains valid until the BO is freed.
|
||||
*
|
||||
* @restore_process_bos: Restore all BOs that belong to the
|
||||
* process. This is intended for restoring memory mappings after a TTM
|
||||
* eviction.
|
||||
*
|
||||
* @invalidate_tlbs: Invalidate TLBs for a specific PASID
|
||||
*
|
||||
* @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID
|
||||
*
|
||||
* @submit_ib: Submits an IB to the engine specified by inserting the
|
||||
* IB to the corresponding ring (ring type). The IB is executed with the
|
||||
* specified VMID in a user mode context.
|
||||
*
|
||||
* This structure contains function pointers to services that the kgd driver
|
||||
* provides to amdkfd driver.
|
||||
*
|
||||
|
@ -258,8 +323,6 @@ struct kfd2kgd_calls {
|
|||
uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
|
||||
struct kgd_dev *kgd,
|
||||
uint8_t vmid);
|
||||
void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
|
||||
uint8_t vmid);
|
||||
|
||||
uint16_t (*get_fw_version)(struct kgd_dev *kgd,
|
||||
enum kgd_engine_type type);
|
||||
|
@ -270,6 +333,33 @@ struct kfd2kgd_calls {
|
|||
void (*get_cu_info)(struct kgd_dev *kgd,
|
||||
struct kfd_cu_info *cu_info);
|
||||
uint64_t (*get_vram_usage)(struct kgd_dev *kgd);
|
||||
|
||||
int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
|
||||
void **process_info, struct dma_fence **ef);
|
||||
void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
|
||||
uint32_t (*get_process_page_dir)(void *vm);
|
||||
void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
|
||||
uint32_t vmid, uint32_t page_table_base);
|
||||
int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va,
|
||||
uint64_t size, void *vm,
|
||||
struct kgd_mem **mem, uint64_t *offset,
|
||||
uint32_t flags);
|
||||
int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem);
|
||||
int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
|
||||
void *vm);
|
||||
int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
|
||||
void *vm);
|
||||
int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
|
||||
int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, struct kgd_mem *mem,
|
||||
void **kptr, uint64_t *size);
|
||||
int (*restore_process_bos)(void *process_info, struct dma_fence **ef);
|
||||
|
||||
int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
|
||||
int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
|
||||
|
||||
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
uint32_t *ib_cmd, uint32_t ib_len);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -288,6 +378,9 @@ struct kfd2kgd_calls {
|
|||
*
|
||||
* @resume: Notifies amdkfd about a resume action done to a kgd device
|
||||
*
|
||||
* @schedule_evict_and_restore_process: Schedules work queue that will prepare
|
||||
* for safe eviction of KFD BOs that belong to the specified process.
|
||||
*
|
||||
* This structure contains function callback pointers so the kgd driver
|
||||
* will notify to the amdkfd about certain status changes.
|
||||
*
|
||||
|
@ -302,6 +395,8 @@ struct kgd2kfd_calls {
|
|||
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
void (*suspend)(struct kfd_dev *kfd);
|
||||
int (*resume)(struct kfd_dev *kfd);
|
||||
int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
};
|
||||
|
||||
int kgd2kfd_init(unsigned interface_version,
|
||||
|
|
|
@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args {
|
|||
};
|
||||
|
||||
struct kfd_ioctl_set_trap_handler_args {
|
||||
uint64_t tba_addr; /* to KFD */
|
||||
uint64_t tma_addr; /* to KFD */
|
||||
uint32_t gpu_id; /* to KFD */
|
||||
uint32_t pad;
|
||||
__u64 tba_addr; /* to KFD */
|
||||
__u64 tma_addr; /* to KFD */
|
||||
__u32 gpu_id; /* to KFD */
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
#define AMDKFD_IOCTL_BASE 'K'
|
||||
|
|
Loading…
Reference in New Issue