drm/amdkfd: Add DMABuf import functionality
This is used for interoperability between ROCm compute and graphics APIs. It allows importing graphics driver BOs into the ROCm SVM address space for zero-copy GPU access. The API is split into two steps (query and import) to allow user mode to manage the virtual address space allocation for the imported buffer. Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
3704d56e1a
commit
1dde0ea95b
|
@ -26,6 +26,7 @@
|
|||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include <linux/module.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
const struct kgd2kfd_calls *kgd2kfd;
|
||||
|
||||
|
@ -433,6 +434,62 @@ void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
|
|||
cu_info->lds_size = acu_info.lds_size;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
|
||||
struct kgd_dev **dma_buf_kgd,
|
||||
uint64_t *bo_size, void *metadata_buffer,
|
||||
size_t buffer_size, uint32_t *metadata_size,
|
||||
uint32_t *flags)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct dma_buf *dma_buf;
|
||||
struct drm_gem_object *obj;
|
||||
struct amdgpu_bo *bo;
|
||||
uint64_t metadata_flags;
|
||||
int r = -EINVAL;
|
||||
|
||||
dma_buf = dma_buf_get(dma_buf_fd);
|
||||
if (IS_ERR(dma_buf))
|
||||
return PTR_ERR(dma_buf);
|
||||
|
||||
if (dma_buf->ops != &amdgpu_dmabuf_ops)
|
||||
/* Can't handle non-graphics buffers */
|
||||
goto out_put;
|
||||
|
||||
obj = dma_buf->priv;
|
||||
if (obj->dev->driver != adev->ddev->driver)
|
||||
/* Can't handle buffers from different drivers */
|
||||
goto out_put;
|
||||
|
||||
adev = obj->dev->dev_private;
|
||||
bo = gem_to_amdgpu_bo(obj);
|
||||
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT)))
|
||||
/* Only VRAM and GTT BOs are supported */
|
||||
goto out_put;
|
||||
|
||||
r = 0;
|
||||
if (dma_buf_kgd)
|
||||
*dma_buf_kgd = (struct kgd_dev *)adev;
|
||||
if (bo_size)
|
||||
*bo_size = amdgpu_bo_size(bo);
|
||||
if (metadata_size)
|
||||
*metadata_size = bo->metadata_size;
|
||||
if (metadata_buffer)
|
||||
r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
|
||||
metadata_size, &metadata_flags);
|
||||
if (flags) {
|
||||
*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
|
||||
ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
|
||||
|
||||
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
|
||||
*flags |= ALLOC_MEM_FLAGS_PUBLIC;
|
||||
}
|
||||
|
||||
out_put:
|
||||
dma_buf_put(dma_buf);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
|
|
@ -149,6 +149,11 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
|
|||
|
||||
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
|
||||
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
|
||||
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
|
||||
struct kgd_dev **dmabuf_kgd,
|
||||
uint64_t *bo_size, void *metadata_buffer,
|
||||
size_t buffer_size, uint32_t *metadata_size,
|
||||
uint32_t *flags);
|
||||
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
|
||||
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
|
||||
|
||||
|
@ -200,6 +205,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
|
|||
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||
struct kfd_vm_fault_info *info);
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
struct dma_buf *dmabuf,
|
||||
uint64_t va, void *vm,
|
||||
struct kgd_mem **mem, uint64_t *size,
|
||||
uint64_t *mmap_offset);
|
||||
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <linux/list.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_vm.h"
|
||||
|
@ -1664,6 +1665,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
struct dma_buf *dma_buf,
|
||||
uint64_t va, void *vm,
|
||||
struct kgd_mem **mem, uint64_t *size,
|
||||
uint64_t *mmap_offset)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct drm_gem_object *obj;
|
||||
struct amdgpu_bo *bo;
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
|
||||
if (dma_buf->ops != &amdgpu_dmabuf_ops)
|
||||
/* Can't handle non-graphics buffers */
|
||||
return -EINVAL;
|
||||
|
||||
obj = dma_buf->priv;
|
||||
if (obj->dev->dev_private != adev)
|
||||
/* Can't handle buffers from other devices */
|
||||
return -EINVAL;
|
||||
|
||||
bo = gem_to_amdgpu_bo(obj);
|
||||
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT)))
|
||||
/* Only VRAM and GTT BOs are supported */
|
||||
return -EINVAL;
|
||||
|
||||
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
|
||||
if (!*mem)
|
||||
return -ENOMEM;
|
||||
|
||||
if (size)
|
||||
*size = amdgpu_bo_size(bo);
|
||||
|
||||
if (mmap_offset)
|
||||
*mmap_offset = amdgpu_bo_mmap_offset(bo);
|
||||
|
||||
INIT_LIST_HEAD(&(*mem)->bo_va_list);
|
||||
mutex_init(&(*mem)->lock);
|
||||
(*mem)->mapping_flags =
|
||||
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
|
||||
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
|
||||
|
||||
(*mem)->bo = amdgpu_bo_ref(bo);
|
||||
(*mem)->va = va;
|
||||
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
|
||||
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
|
||||
(*mem)->mapped_to_gpu_memory = 0;
|
||||
(*mem)->process_info = avm->process_info;
|
||||
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
|
||||
amdgpu_sync_create(&(*mem)->sync);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Evict a userptr BO by stopping the queues if necessary
|
||||
*
|
||||
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
|
||||
|
|
|
@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
|
|||
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
|
||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
|
||||
|
||||
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||
|
||||
/*
|
||||
* GEM objects.
|
||||
*/
|
||||
|
|
|
@ -39,8 +39,6 @@
|
|||
#include <drm/amdgpu_drm.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
static const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
|
||||
* implementation
|
||||
|
@ -332,7 +330,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||
const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||
.attach = amdgpu_gem_map_attach,
|
||||
.detach = amdgpu_gem_map_detach,
|
||||
.map_dma_buf = drm_gem_map_dma_buf,
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include <linux/time.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <asm/processor.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
|
@ -1550,6 +1551,115 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
struct kfd_ioctl_get_dmabuf_info_args *args = data;
|
||||
struct kfd_dev *dev = NULL;
|
||||
struct kgd_dev *dma_buf_kgd;
|
||||
void *metadata_buffer = NULL;
|
||||
uint32_t flags;
|
||||
unsigned int i;
|
||||
int r;
|
||||
|
||||
/* Find a KFD GPU device that supports the get_dmabuf_info query */
|
||||
for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
|
||||
if (dev)
|
||||
break;
|
||||
if (!dev)
|
||||
return -EINVAL;
|
||||
|
||||
if (args->metadata_ptr) {
|
||||
metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
|
||||
if (!metadata_buffer)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Get dmabuf info from KGD */
|
||||
r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
|
||||
&dma_buf_kgd, &args->size,
|
||||
metadata_buffer, args->metadata_size,
|
||||
&args->metadata_size, &flags);
|
||||
if (r)
|
||||
goto exit;
|
||||
|
||||
/* Reverse-lookup gpu_id from kgd pointer */
|
||||
dev = kfd_device_by_kgd(dma_buf_kgd);
|
||||
if (!dev) {
|
||||
r = -EINVAL;
|
||||
goto exit;
|
||||
}
|
||||
args->gpu_id = dev->id;
|
||||
args->flags = flags;
|
||||
|
||||
/* Copy metadata buffer to user mode */
|
||||
if (metadata_buffer) {
|
||||
r = copy_to_user((void __user *)args->metadata_ptr,
|
||||
metadata_buffer, args->metadata_size);
|
||||
if (r != 0)
|
||||
r = -EFAULT;
|
||||
}
|
||||
|
||||
exit:
|
||||
kfree(metadata_buffer);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_import_dmabuf(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
struct kfd_ioctl_import_dmabuf_args *args = data;
|
||||
struct kfd_process_device *pdd;
|
||||
struct dma_buf *dmabuf;
|
||||
struct kfd_dev *dev;
|
||||
int idr_handle;
|
||||
uint64_t size;
|
||||
void *mem;
|
||||
int r;
|
||||
|
||||
dev = kfd_device_by_id(args->gpu_id);
|
||||
if (!dev)
|
||||
return -EINVAL;
|
||||
|
||||
dmabuf = dma_buf_get(args->dmabuf_fd);
|
||||
if (!dmabuf)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
pdd = kfd_bind_process_to_device(dev, p);
|
||||
if (IS_ERR(pdd)) {
|
||||
r = PTR_ERR(pdd);
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
|
||||
args->va_addr, pdd->vm,
|
||||
(struct kgd_mem **)&mem, &size,
|
||||
NULL);
|
||||
if (r)
|
||||
goto err_unlock;
|
||||
|
||||
idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
|
||||
if (idr_handle < 0) {
|
||||
r = -EFAULT;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
|
||||
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
|
||||
err_unlock:
|
||||
mutex_unlock(&p->mutex);
|
||||
return r;
|
||||
}
|
||||
|
||||
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
|
||||
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
|
||||
.cmd_drv = 0, .name = #ioctl}
|
||||
|
@ -1635,7 +1745,13 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
|||
kfd_ioctl_set_cu_mask, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
|
||||
kfd_ioctl_get_queue_wave_state, 0)
|
||||
kfd_ioctl_get_queue_wave_state, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
|
||||
kfd_ioctl_get_dmabuf_info, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
|
||||
kfd_ioctl_import_dmabuf, 0),
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -793,6 +793,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
|
|||
struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
|
||||
struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
|
||||
struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
|
||||
struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
|
||||
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
|
||||
int kfd_numa_node_to_apic_id(int numa_node_id);
|
||||
|
||||
|
|
|
@ -111,6 +111,24 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
|
|||
return device;
|
||||
}
|
||||
|
||||
struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
|
||||
{
|
||||
struct kfd_topology_device *top_dev;
|
||||
struct kfd_dev *device = NULL;
|
||||
|
||||
down_read(&topology_lock);
|
||||
|
||||
list_for_each_entry(top_dev, &topology_device_list, list)
|
||||
if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
|
||||
device = top_dev->gpu;
|
||||
break;
|
||||
}
|
||||
|
||||
up_read(&topology_lock);
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
/* Called with write topology_lock acquired */
|
||||
static void kfd_release_topology_device(struct kfd_topology_device *dev)
|
||||
{
|
||||
|
|
|
@ -398,6 +398,24 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
|
|||
__u32 n_success; /* to/from KFD */
|
||||
};
|
||||
|
||||
struct kfd_ioctl_get_dmabuf_info_args {
|
||||
__u64 size; /* from KFD */
|
||||
__u64 metadata_ptr; /* to KFD */
|
||||
__u32 metadata_size; /* to KFD (space allocated by user)
|
||||
* from KFD (actual metadata size)
|
||||
*/
|
||||
__u32 gpu_id; /* from KFD */
|
||||
__u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
|
||||
__u32 dmabuf_fd; /* to KFD */
|
||||
};
|
||||
|
||||
struct kfd_ioctl_import_dmabuf_args {
|
||||
__u64 va_addr; /* to KFD */
|
||||
__u64 handle; /* from KFD */
|
||||
__u32 gpu_id; /* to KFD */
|
||||
__u32 dmabuf_fd; /* to KFD */
|
||||
};
|
||||
|
||||
#define AMDKFD_IOCTL_BASE 'K'
|
||||
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
|
||||
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
|
||||
|
@ -486,7 +504,13 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
|
|||
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \
|
||||
AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)
|
||||
|
||||
#define AMDKFD_IOC_GET_DMABUF_INFO \
|
||||
AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)
|
||||
|
||||
#define AMDKFD_IOC_IMPORT_DMABUF \
|
||||
AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
|
||||
|
||||
#define AMDKFD_COMMAND_START 0x01
|
||||
#define AMDKFD_COMMAND_END 0x1C
|
||||
#define AMDKFD_COMMAND_END 0x1E
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue