diff --git a/MAINTAINERS b/MAINTAINERS index ddb9ac8d32b3..23fa31dc9390 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -624,6 +624,8 @@ L: dri-devel@lists.freedesktop.org T: git git://people.freedesktop.org/~gabbayo/linux.git S: Supported F: drivers/gpu/drm/amd/amdkfd/ +F: drivers/gpu/drm/amd/include/cik_structs.h +F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h F: drivers/gpu/drm/radeon/radeon_kfd.c F: drivers/gpu/drm/radeon/radeon_kfd.h F: include/uapi/linux/kfd_ioctl.h diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index be6246de5091..cd09c05ee7e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -7,8 +7,11 @@ ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ - kfd_kernel_queue.o kfd_packet_manager.o \ + kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \ + kfd_kernel_queue.o kfd_kernel_queue_cik.o \ + kfd_kernel_queue_vi.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ + kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ kfd_interrupt.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 607fc5ceadbe..01ff332fabd4 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -168,6 +168,8 @@ #define IB_ATC_EN (1U << 23) #define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20) +#define AQL_ENABLE 1 + #define CP_HQD_DEQUEUE_REQUEST 0xC974 #define DEQUEUE_REQUEST_DRAIN 1 #define DEQUEUE_REQUEST_RESET 2 @@ -188,6 +190,17 @@ #define MQD_VMID_MASK (0xf << 0) #define MQD_CONTROL_PRIV_STATE_EN (1U << 8) +#define SDMA_RB_VMID(x) (x << 24) +#define SDMA_RB_ENABLE (1 << 0) +#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */ +#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12) +#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ +#define SDMA_OFFSET(x) (x << 0) +#define SDMA_DB_ENABLE (1 << 28) +#define SDMA_ATC (1 << 0) +#define SDMA_VA_PTR32 (1 << 4) +#define SDMA_VA_SHARED_BASE(x) (x << 8) + #define GRBM_GFX_INDEX 0x30800 #define INSTANCE_INDEX(x) ((x) << 0) #define SH_INDEX(x) ((x) << 8) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4c0b1e42e405..38b6150a19ee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -145,6 +145,8 @@ static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, static int set_queue_properties_from_user(struct queue_properties *q_properties, struct kfd_ioctl_create_queue_args *args) { + void *tmp; + if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; @@ -182,6 +184,20 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return -EFAULT; } + tmp = (void *)(uintptr_t)args->eop_buffer_address; + if (tmp != NULL && + !access_ok(VERIFY_WRITE, tmp, sizeof(uint32_t))) { + pr_debug("kfd: can't access eop buffer"); + return -EFAULT; + } + + tmp = (void *)(uintptr_t)args->ctx_save_restore_address; + if (tmp != NULL && + !access_ok(VERIFY_WRITE, tmp, sizeof(uint32_t))) { + pr_debug("kfd: can't access ctx save restore buffer"); + return -EFAULT; + } + q_properties->is_interop = false; q_properties->queue_percent = args->queue_percentage; q_properties->priority = args->queue_priority; @@ -189,6 +205,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->queue_size = args->ring_size; q_properties->read_ptr = (uint32_t *) args->read_pointer_address; q_properties->write_ptr = (uint32_t *) args->write_pointer_address; + q_properties->eop_ring_buffer_address = args->eop_buffer_address; + q_properties->eop_ring_buffer_size = args->eop_buffer_size; + q_properties->ctx_save_restore_area_address = + args->ctx_save_restore_address; + q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -220,6 +241,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, pr_debug("Queue Format (%d)\n", q_properties->format); + pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address); + + pr_debug("Queue CTX save arex (0x%llX)\n", + q_properties->ctx_save_restore_area_address); + return 0; } @@ -244,9 +270,12 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, if (err) return err; + pr_debug("kfd: looking for gpu id 0x%x\n", args.gpu_id); dev = kfd_device_by_id(args.gpu_id); - if (dev == NULL) + if (dev == NULL) { + pr_debug("kfd: gpu id 0x%x was not found\n", args.gpu_id); return -EINVAL; + } mutex_lock(&p->mutex); @@ -410,7 +439,7 @@ static long kfd_ioctl_set_memory_policy(struct file *filep, (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; - if (!dev->dqm->set_cache_memory_policy(dev->dqm, + if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 994a9c1bdd04..a770ec6f22ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -31,6 +31,14 @@ #define MQD_SIZE_ALIGNED 768 static const struct kfd_device_info kaveri_device_info = { + .asic_family = CHIP_KAVERI, + .max_pasid_bits = 16, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .mqd_size_aligned = MQD_SIZE_ALIGNED +}; + +static const struct kfd_device_info carrizo_device_info = { + .asic_family = CHIP_CARRIZO, .max_pasid_bits = 16, .ih_ring_entry_size = 4 * sizeof(uint32_t), .num_of_watch_points = 4, @@ -65,7 +73,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x1318, &kaveri_device_info }, /* Kaveri */ { 0x131B, &kaveri_device_info }, /* Kaveri */ { 0x131C, &kaveri_device_info }, /* Kaveri */ - { 0x131D, &kaveri_device_info }, /* Kaveri */ + { 0x131D, &kaveri_device_info } /* Kaveri */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -245,7 +253,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; } - if (kfd->dqm->start(kfd->dqm) != 0) { + if (kfd->dqm->ops.start(kfd->dqm) != 0) { dev_err(kfd_device, "Error starting queuen manager for device (%x:%x)\n", kfd->pdev->vendor, kfd->pdev->device); @@ -299,7 +307,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) BUG_ON(kfd == NULL); if (kfd->init_complete) { - kfd->dqm->stop(kfd->dqm); + kfd->dqm->ops.stop(kfd->dqm); amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); amd_iommu_free_device(kfd->pdev); } @@ -320,7 +328,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) return -ENXIO; amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); - kfd->dqm->start(kfd->dqm); + kfd->dqm->ops.start(kfd->dqm); } return 0; @@ -503,7 +511,10 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) unsigned int bit; BUG_ON(!kfd); - BUG_ON(!mem_obj); + + /* Act like kfree when trying to free a NULL object */ + if (!mem_obj) + return 0; pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n", mem_obj, mem_obj->range_start, mem_obj->range_end); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 6806e64c5ffd..e804e871ff82 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -26,20 +26,17 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_mqd_manager.h" #include "cik_regs.h" #include "kfd_kernel_queue.h" -#include "../../radeon/cik_reg.h" /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) -static bool is_mem_initialized; - -static int init_memory(struct device_queue_manager *dqm); static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, unsigned int vmid); @@ -61,11 +58,11 @@ static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { if (type == KFD_QUEUE_TYPE_SDMA) - return KFD_MQD_TYPE_CIK_SDMA; - return KFD_MQD_TYPE_CIK_CP; + return KFD_MQD_TYPE_SDMA; + return KFD_MQD_TYPE_CP; } -static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) +inline unsigned int get_pipes_num(struct device_queue_manager *dqm) { BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.compute_pipe_count; @@ -82,7 +79,7 @@ static inline unsigned int get_pipes_num_cpsch(void) return PIPE_PER_ME_CP_SCHEDULING; } -static inline unsigned int +inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) { uint32_t nybble; @@ -92,7 +89,7 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) return nybble; } -static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) +inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { unsigned int shared_base; @@ -101,41 +98,7 @@ static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) return shared_base; } -static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble); -static void init_process_memory(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct kfd_process_device *pdd; - unsigned int temp; - - BUG_ON(!dqm || !qpd); - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | - DEFAULT_MTYPE(MTYPE_NONCACHED) | - APE1_MTYPE(MTYPE_NONCACHED); - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } - - if (qpd->pqm->process->is_32bit_user_mode) { - temp = get_sh_mem_bases_32(pdd); - qpd->sh_mem_bases = SHARED_BASE(temp); - qpd->sh_mem_config |= PTR32; - } else { - temp = get_sh_mem_bases_nybble_64(pdd); - qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); - } - - pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", - qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); -} - -static void program_sh_mem_settings(struct device_queue_manager *dqm, +void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, @@ -229,12 +192,12 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) { bool set; - int pipe, bit; + int pipe, bit, i; set = false; - for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm); - pipe = (pipe + 1) % get_pipes_num(dqm)) { + for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm); + pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) { if (dqm->allocated_queues[pipe] != 0) { bit = find_first_bit( (unsigned long *)&dqm->allocated_queues[pipe], @@ -275,7 +238,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, BUG_ON(!dqm || !q || !qpd); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); if (mqd == NULL) return -ENOMEM; @@ -298,7 +261,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd, *mqd_sdma; + struct mqd_manager *mqd; + BUG_ON(!dqm || !q || !q->mqd || !qpd); retval = 0; @@ -306,33 +270,32 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("kfd: In Func %s\n", __func__); mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); - if (mqd == NULL) { - retval = -ENOMEM; - goto out; - } - mqd_sdma = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA); - if (mqd_sdma == NULL) { - mutex_unlock(&dqm->lock); - return -ENOMEM; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + if (mqd == NULL) { + retval = -ENOMEM; + goto out; + } + deallocate_hqd(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); + if (mqd == NULL) { + retval = -ENOMEM; + goto out; + } + dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); } retval = mqd->destroy_mqd(mqd, q->mqd, - KFD_PREEMPT_TYPE_WAVEFRONT, + KFD_PREEMPT_TYPE_WAVEFRONT_RESET, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, q->pipe, q->queue); if (retval != 0) goto out; - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) - deallocate_hqd(dqm, q); - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); - } - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); list_del(&q->list); @@ -352,7 +315,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) BUG_ON(!dqm || !q || !q->mqd); mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, q->properties.type); + mqd = dqm->ops.get_mqd_manager(dqm, q->properties.type); if (mqd == NULL) { mutex_unlock(&dqm->lock); return -ENOMEM; @@ -395,6 +358,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; + int retval; BUG_ON(!dqm || !qpd); @@ -409,12 +373,13 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, mutex_lock(&dqm->lock); list_add(&n->list, &dqm->queues); - init_process_memory(dqm, qpd); + retval = dqm->ops_asic_specific.register_process(dqm, qpd); + dqm->processes_count++; mutex_unlock(&dqm->lock); - return 0; + return retval; } static int unregister_process_nocpsch(struct device_queue_manager *dqm, @@ -459,48 +424,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, vmid); } -static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) -{ - /* In 64-bit mode, we can only control the top 3 bits of the LDS, - * scratch and GPUVM apertures. - * The hardware fills in the remaining 59 bits according to the - * following pattern: - * LDS: X0000000'00000000 - X0000001'00000000 (4GB) - * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) - * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) - * - * (where X/Y is the configurable nybble with the low-bit 0) - * - * LDS and scratch will have the same top nybble programmed in the - * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. - * GPUVM can have a different top nybble programmed in the - * top 3 bits of SH_MEM_BASES.SHARED_BASE. - * We don't bother to support different top nybbles - * for LDS/Scratch and GPUVM. - */ - - BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || - top_address_nybble == 0); - - return PRIVATE_BASE(top_address_nybble << 12) | - SHARED_BASE(top_address_nybble << 12); -} - -static int init_memory(struct device_queue_manager *dqm) -{ - int i, retval; - - for (i = 8; i < 16; i++) - set_pasid_vmid_mapping(dqm, 0, i); - - retval = kfd2kgd->init_memory(dqm->dev->kgd); - if (retval == 0) - is_mem_initialized = true; - return retval; -} - - -static int init_pipelines(struct device_queue_manager *dqm, +int init_pipelines(struct device_queue_manager *dqm, unsigned int pipes_num, unsigned int first_pipe) { void *hpdptr; @@ -533,7 +457,7 @@ static int init_pipelines(struct device_queue_manager *dqm, memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); if (mqd == NULL) { kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); return -ENOMEM; @@ -560,10 +484,6 @@ static int init_scheduler(struct device_queue_manager *dqm) pr_debug("kfd: In %s\n", __func__); retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE); - if (retval != 0) - return retval; - - retval = init_memory(dqm); return retval; } @@ -668,7 +588,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd; int retval; - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); if (!mqd) return -ENOMEM; @@ -737,7 +657,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; - retval = init_pipelines(dqm, get_pipes_num(dqm), 0); + retval = dqm->ops_asic_specific.initialize(dqm); if (retval != 0) goto fail_init_pipelines; @@ -871,7 +791,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) select_sdma_engine_id(q); - mqd = dqm->get_mqd_manager(dqm, + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (mqd == NULL) { @@ -910,7 +830,7 @@ static int fence_wait_timeout(unsigned int *fence_addr, pr_err("kfd: qcm fence wait loop timeout expired\n"); return -ETIME; } - cpu_relax(); + schedule(); } return 0; @@ -1016,7 +936,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, /* remove queue from list to prevent rescheduling after preemption */ mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; @@ -1057,8 +977,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - uint32_t default_mtype; - uint32_t ape1_mtype; + bool retval; pr_debug("kfd: In func %s\n", __func__); @@ -1095,18 +1014,13 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit = limit >> 16; } - default_mtype = (default_policy == cache_policy_coherent) ? - MTYPE_NONCACHED : - MTYPE_CACHED; - - ape1_mtype = (alternate_policy == cache_policy_coherent) ? - MTYPE_NONCACHED : - MTYPE_CACHED; - - qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) - | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) - | DEFAULT_MTYPE(default_mtype) - | APE1_MTYPE(ape1_mtype); + retval = dqm->ops_asic_specific.set_cache_memory_policy( + dqm, + qpd, + default_policy, + alternate_policy, + alternate_aperture_base, + alternate_aperture_size); if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); @@ -1116,7 +1030,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit); mutex_unlock(&dqm->lock); - return true; + return retval; out: mutex_unlock(&dqm->lock); @@ -1129,6 +1043,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) BUG_ON(!dev); + pr_debug("kfd: loading device queue manager\n"); + dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); if (!dqm) return NULL; @@ -1138,40 +1054,47 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case KFD_SCHED_POLICY_HWS: case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: /* initialize dqm for cp scheduling */ - dqm->create_queue = create_queue_cpsch; - dqm->initialize = initialize_cpsch; - dqm->start = start_cpsch; - dqm->stop = stop_cpsch; - dqm->destroy_queue = destroy_queue_cpsch; - dqm->update_queue = update_queue; - dqm->get_mqd_manager = get_mqd_manager_nocpsch; - dqm->register_process = register_process_nocpsch; - dqm->unregister_process = unregister_process_nocpsch; - dqm->uninitialize = uninitialize_nocpsch; - dqm->create_kernel_queue = create_kernel_queue_cpsch; - dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch; - dqm->set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.create_queue = create_queue_cpsch; + dqm->ops.initialize = initialize_cpsch; + dqm->ops.start = start_cpsch; + dqm->ops.stop = stop_cpsch; + dqm->ops.destroy_queue = destroy_queue_cpsch; + dqm->ops.update_queue = update_queue; + dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; + dqm->ops.register_process = register_process_nocpsch; + dqm->ops.unregister_process = unregister_process_nocpsch; + dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; + dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; + dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ - dqm->start = start_nocpsch; - dqm->stop = stop_nocpsch; - dqm->create_queue = create_queue_nocpsch; - dqm->destroy_queue = destroy_queue_nocpsch; - dqm->update_queue = update_queue; - dqm->get_mqd_manager = get_mqd_manager_nocpsch; - dqm->register_process = register_process_nocpsch; - dqm->unregister_process = unregister_process_nocpsch; - dqm->initialize = initialize_nocpsch; - dqm->uninitialize = uninitialize_nocpsch; - dqm->set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.start = start_nocpsch; + dqm->ops.stop = stop_nocpsch; + dqm->ops.create_queue = create_queue_nocpsch; + dqm->ops.destroy_queue = destroy_queue_nocpsch; + dqm->ops.update_queue = update_queue; + dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; + dqm->ops.register_process = register_process_nocpsch; + dqm->ops.unregister_process = unregister_process_nocpsch; + dqm->ops.initialize = initialize_nocpsch; + dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; default: BUG(); break; } - if (dqm->initialize(dqm) != 0) { + switch (dev->device_info->asic_family) { + case CHIP_CARRIZO: + device_queue_manager_init_vi(&dqm->ops_asic_specific); + case CHIP_KAVERI: + device_queue_manager_init_cik(&dqm->ops_asic_specific); + } + + if (dqm->ops.initialize(dqm) != 0) { kfree(dqm); return NULL; } @@ -1183,7 +1106,6 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) { BUG_ON(!dqm); - dqm->uninitialize(dqm); + dqm->ops.uninitialize(dqm); kfree(dqm); } - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 554c06ee8892..19347956eeb9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -46,7 +46,7 @@ struct device_process_node { }; /** - * struct device_queue_manager + * struct device_queue_manager_ops * * @create_queue: Queue creation routine. * @@ -81,15 +81,9 @@ struct device_process_node { * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the * memory apertures. * - * This struct is a base class for the kfd queues scheduler in the - * device level. The device base class should expose the basic operations - * for queue creation and queue destruction. This base class hides the - * scheduling mode of the driver and the specific implementation of the - * concrete device. This class is the only class in the queues scheduler - * that configures the H/W. */ -struct device_queue_manager { +struct device_queue_manager_ops { int (*create_queue)(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, @@ -124,7 +118,23 @@ struct device_queue_manager { enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); +}; +/** + * struct device_queue_manager + * + * This struct is a base class for the kfd queues scheduler in the + * device level. The device base class should expose the basic operations + * for queue creation and queue destruction. This base class hides the + * scheduling mode of the driver and the specific implementation of the + * concrete device. This class is the only class in the queues scheduler + * that configures the H/W. + * + */ + +struct device_queue_manager { + struct device_queue_manager_ops ops; + struct device_queue_manager_ops ops_asic_specific; struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct packet_manager packets; @@ -146,6 +156,14 @@ struct device_queue_manager { bool active_runlist; }; - +void device_queue_manager_init_cik(struct device_queue_manager_ops *ops); +void device_queue_manager_init_vi(struct device_queue_manager_ops *ops); +void program_sh_mem_settings(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *qpd); +inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd); +int init_pipelines(struct device_queue_manager *dqm, + unsigned int pipes_num, unsigned int first_pipe); +inline unsigned int get_pipes_num(struct device_queue_manager *dqm); #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c new file mode 100644 index 000000000000..6b072466e2a6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -0,0 +1,135 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" +#include "cik_regs.h" + +static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +static int register_process_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static int initialize_cpsch_cik(struct device_queue_manager *dqm); + +void device_queue_manager_init_cik(struct device_queue_manager_ops *ops) +{ + ops->set_cache_memory_policy = set_cache_memory_policy_cik; + ops->register_process = register_process_cik; + ops->initialize = initialize_cpsch_cik; +} + +static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) +{ + /* In 64-bit mode, we can only control the top 3 bits of the LDS, + * scratch and GPUVM apertures. + * The hardware fills in the remaining 59 bits according to the + * following pattern: + * LDS: X0000000'00000000 - X0000001'00000000 (4GB) + * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) + * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) + * + * (where X/Y is the configurable nybble with the low-bit 0) + * + * LDS and scratch will have the same top nybble programmed in the + * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. + * GPUVM can have a different top nybble programmed in the + * top 3 bits of SH_MEM_BASES.SHARED_BASE. + * We don't bother to support different top nybbles + * for LDS/Scratch and GPUVM. + */ + + BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + top_address_nybble == 0); + + return PRIVATE_BASE(top_address_nybble << 12) | + SHARED_BASE(top_address_nybble << 12); +} + +static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + uint32_t default_mtype; + uint32_t ape1_mtype; + + default_mtype = (default_policy == cache_policy_coherent) ? + MTYPE_NONCACHED : + MTYPE_CACHED; + + ape1_mtype = (alternate_policy == cache_policy_coherent) ? + MTYPE_NONCACHED : + MTYPE_CACHED; + + qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) + | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) + | DEFAULT_MTYPE(default_mtype) + | APE1_MTYPE(ape1_mtype); + + return true; +} + +static int register_process_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + BUG_ON(!dqm || !qpd); + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | + DEFAULT_MTYPE(MTYPE_NONCACHED) | + APE1_MTYPE(MTYPE_NONCACHED); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + if (qpd->pqm->process->is_32bit_user_mode) { + temp = get_sh_mem_bases_32(pdd); + qpd->sh_mem_bases = SHARED_BASE(temp); + qpd->sh_mem_config |= PTR32; + } else { + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + } + + pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; +} + +static int initialize_cpsch_cik(struct device_queue_manager *dqm) +{ + return init_pipelines(dqm, get_pipes_num(dqm), 0); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c new file mode 100644 index 000000000000..20553dcd257d --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -0,0 +1,64 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" + +static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +static int register_process_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static int initialize_cpsch_vi(struct device_queue_manager *dqm); + +void device_queue_manager_init_vi(struct device_queue_manager_ops *ops) +{ + pr_warn("amdkfd: VI DQM is not currently supported\n"); + + ops->set_cache_memory_policy = set_cache_memory_policy_vi; + ops->register_process = register_process_vi; + ops->initialize = initialize_cpsch_vi; +} + +static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + return false; +} + +static int register_process_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + return -1; +} + +static int initialize_cpsch_vi(struct device_queue_manager *dqm) +{ + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 0fd8bb7c863e..c04b1ac60bd9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -56,8 +56,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, switch (type) { case KFD_QUEUE_TYPE_DIQ: case KFD_QUEUE_TYPE_HIQ: - kq->mqd = dev->dqm->get_mqd_manager(dev->dqm, - KFD_MQD_TYPE_CIK_HIQ); + kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm, + KFD_MQD_TYPE_HIQ); break; default: BUG(); @@ -73,13 +73,16 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, goto err_get_kernel_doorbell; retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); - if (retval != 0) goto err_pq_allocate_vidmem; kq->pq_kernel_addr = kq->pq->cpu_ptr; kq->pq_gpu_addr = kq->pq->gpu_addr; + retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); + if (retval == false) + goto err_eop_allocate_vidmem; + retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), &kq->rptr_mem); @@ -111,6 +114,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.queue_address = kq->pq_gpu_addr; prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr; prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; + prop.eop_ring_buffer_address = kq->eop_gpu_addr; + prop.eop_ring_buffer_size = PAGE_SIZE; if (init_queue(&kq->queue, prop) != 0) goto err_init_queue; @@ -156,6 +161,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, err_wptr_allocate_vidmem: kfd_gtt_sa_free(dev, kq->rptr_mem); err_rptr_allocate_vidmem: + kfd_gtt_sa_free(dev, kq->eop_mem); +err_eop_allocate_vidmem: kfd_gtt_sa_free(dev, kq->pq); err_pq_allocate_vidmem: pr_err("kfd: error init pq\n"); @@ -182,6 +189,7 @@ static void uninitialize(struct kernel_queue *kq) kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem); + kq->ops_asic_specific.uninitialize(kq); kfd_gtt_sa_free(kq->dev, kq->pq); kfd_release_kernel_doorbell(kq->dev, kq->queue->properties.doorbell_ptr); @@ -254,28 +262,6 @@ static void submit_packet(struct kernel_queue *kq) kq->pending_wptr); } -static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms) -{ - unsigned long org_timeout_ms; - - BUG_ON(!kq); - - org_timeout_ms = timeout_ms; - timeout_ms += jiffies * 1000 / HZ; - while (*kq->wptr_kernel != *kq->rptr_kernel) { - if (time_after(jiffies * 1000 / HZ, timeout_ms)) { - pr_err("kfd: kernel_queue %s timeout expired %lu\n", - __func__, org_timeout_ms); - pr_err("kfd: wptr: %d rptr: %d\n", - *kq->wptr_kernel, *kq->rptr_kernel); - return -ETIME; - } - schedule(); - } - - return 0; -} - static void rollback_packet(struct kernel_queue *kq) { BUG_ON(!kq); @@ -293,14 +279,20 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, if (!kq) return NULL; - kq->initialize = initialize; - kq->uninitialize = uninitialize; - kq->acquire_packet_buffer = acquire_packet_buffer; - kq->submit_packet = submit_packet; - kq->sync_with_hw = sync_with_hw; - kq->rollback_packet = rollback_packet; + kq->ops.initialize = initialize; + kq->ops.uninitialize = uninitialize; + kq->ops.acquire_packet_buffer = acquire_packet_buffer; + kq->ops.submit_packet = submit_packet; + kq->ops.rollback_packet = rollback_packet; - if (kq->initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { + switch (dev->device_info->asic_family) { + case CHIP_CARRIZO: + kernel_queue_init_vi(&kq->ops_asic_specific); + case CHIP_KAVERI: + kernel_queue_init_cik(&kq->ops_asic_specific); + } + + if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { pr_err("kfd: failed to init kernel queue\n"); kfree(kq); return NULL; @@ -312,7 +304,7 @@ void kernel_queue_uninit(struct kernel_queue *kq) { BUG_ON(!kq); - kq->uninitialize(kq); + kq->ops.uninitialize(kq); kfree(kq); } @@ -324,19 +316,18 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) BUG_ON(!dev); - pr_debug("kfd: starting kernel queue test\n"); + pr_err("kfd: starting kernel queue test\n"); kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); BUG_ON(!kq); - retval = kq->acquire_packet_buffer(kq, 5, &buffer); + retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); BUG_ON(retval != 0); for (i = 0; i < 5; i++) buffer[i] = kq->nop_packet; - kq->submit_packet(kq); - kq->sync_with_hw(kq, 1000); + kq->ops.submit_packet(kq); - pr_debug("kfd: ending kernel queue test\n"); + pr_err("kfd: ending kernel queue test\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index dcd2bdb68d44..594053136ee4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -28,8 +28,31 @@ #include #include "kfd_priv.h" -struct kernel_queue { - /* interface */ +/** + * struct kernel_queue_ops + * + * @initialize: Initialize a kernel queue, including allocations of GART memory + * needed for the queue. + * + * @uninitialize: Uninitialize a kernel queue and free all its memory usages. + * + * @acquire_packet_buffer: Returns a pointer to the location in the kernel + * queue ring buffer where the calling function can write its packet. It is + * Guaranteed that there is enough space for that packet. It also updates the + * pending write pointer to that location so subsequent calls to + * acquire_packet_buffer will get a correct write pointer + * + * @submit_packet: Update the write pointer and doorbell of a kernel queue. + * + * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel + * queue are equal, which means the CP has read all the submitted packets. + * + * @rollback_packet: This routine is called if we failed to build an acquired + * packet for some reason. It just overwrites the pending wptr with the current + * one + * + */ +struct kernel_queue_ops { bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); void (*uninitialize)(struct kernel_queue *kq); @@ -38,9 +61,12 @@ struct kernel_queue { unsigned int **buffer_ptr); void (*submit_packet)(struct kernel_queue *kq); - int (*sync_with_hw)(struct kernel_queue *kq, - unsigned long timeout_ms); void (*rollback_packet)(struct kernel_queue *kq); +}; + +struct kernel_queue { + struct kernel_queue_ops ops; + struct kernel_queue_ops ops_asic_specific; /* data */ struct kfd_dev *dev; @@ -58,6 +84,9 @@ struct kernel_queue { struct kfd_mem_obj *pq; uint64_t pq_gpu_addr; uint32_t *pq_kernel_addr; + struct kfd_mem_obj *eop_mem; + uint64_t eop_gpu_addr; + uint32_t *eop_kernel_addr; struct kfd_mem_obj *fence_mem_obj; uint64_t fence_gpu_addr; @@ -66,4 +95,7 @@ struct kernel_queue { struct list_head list; }; +void kernel_queue_init_cik(struct kernel_queue_ops *ops); +void kernel_queue_init_vi(struct kernel_queue_ops *ops); + #endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c new file mode 100644 index 000000000000..a90eb440b1fb --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c @@ -0,0 +1,44 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" + +static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_cik(struct kernel_queue *kq); + +void kernel_queue_init_cik(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_cik; + ops->uninitialize = uninitialize_cik; +} + +static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + return true; +} + +static void uninitialize_cik(struct kernel_queue *kq) +{ +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c new file mode 100644 index 000000000000..f1d48281e322 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -0,0 +1,56 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" + +static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_vi(struct kernel_queue *kq); + +void kernel_queue_init_vi(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_vi; + ops->uninitialize = uninitialize_vi; +} + +static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval != 0) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + + return true; +} + +static void uninitialize_vi(struct kernel_queue *kq) +{ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 95d5af138e6e..14c4115c4ae1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -48,7 +48,7 @@ static const struct kgd2kfd_calls kgd2kfd = { int sched_policy = KFD_SCHED_POLICY_HWS; module_param(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, - "Kernel cmdline parameter that defines the amdkfd scheduling policy"); + "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT; module_param(max_num_of_processes, int, 0444); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 678c33f0a1b8..b1ef1368c3bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -21,440 +21,17 @@ * */ -#include -#include #include "kfd_priv.h" -#include "kfd_mqd_manager.h" -#include "cik_regs.h" -#include "../../radeon/cikd.h" -#include "../../radeon/cik_reg.h" - -inline void busy_wait(unsigned long ms) -{ - while (time_before(jiffies, ms)) - cpu_relax(); -} - -static inline struct cik_mqd *get_mqd(void *mqd) -{ - return (struct cik_mqd *)mqd; -} - -static int init_mqd(struct mqd_manager *mm, void **mqd, - struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, - struct queue_properties *q) -{ - uint64_t addr; - struct cik_mqd *m; - int retval; - - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), - mqd_mem_obj); - - if (retval != 0) - return -ENOMEM; - - m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; - addr = (*mqd_mem_obj)->gpu_addr; - - memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); - - m->header = 0xC0310800; - m->compute_pipelinestat_enable = 1; - m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; - - /* - * Make sure to use the last queue state saved on mqd when the cp - * reassigns the queue, so when queue is switched on/off (e.g over - * subscription or quantum timeout) the context will be consistent - */ - m->cp_hqd_persistent_state = - DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ; - - m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; - m->cp_mqd_base_addr_lo = lower_32_bits(addr); - m->cp_mqd_base_addr_hi = upper_32_bits(addr); - - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; - /* Although WinKFD writes this, I suspect it should not be necessary */ - m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; - - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | - QUANTUM_DURATION(10); - - /* - * Pipe Priority - * Identifies the pipe relative priority when this queue is connected - * to the pipeline. The pipe priority is against the GFX pipe and HP3D. - * In KFD we are using a fixed pipe priority set to CS_MEDIUM. - * 0 = CS_LOW (typically below GFX) - * 1 = CS_MEDIUM (typically between HP3D and GFX - * 2 = CS_HIGH (typically above HP3D) - */ - m->cp_hqd_pipe_priority = 1; - m->cp_hqd_queue_priority = 15; - - *mqd = m; - if (gart_addr != NULL) - *gart_addr = addr; - retval = mm->update_mqd(mm, m, q); - - return retval; -} - -static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, - struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, - struct queue_properties *q) -{ - int retval; - struct cik_sdma_rlc_registers *m; - - BUG_ON(!mm || !mqd || !mqd_mem_obj); - - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct cik_sdma_rlc_registers), - mqd_mem_obj); - - if (retval != 0) - return -ENOMEM; - - m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; - - memset(m, 0, sizeof(struct cik_sdma_rlc_registers)); - - *mqd = m; - if (gart_addr != NULL) - *gart_addr = (*mqd_mem_obj)->gpu_addr; - - retval = mm->update_mqd(mm, m, q); - - return retval; -} - -static void uninit_mqd(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - BUG_ON(!mm || !mqd); - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} - -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - BUG_ON(!mm || !mqd); - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} - -static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) -{ - return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr); -} - -static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t __user *wptr) -{ - return kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); -} - -static int update_mqd(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct cik_mqd *m; - - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - - m = get_mqd(mqd); - m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | - DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; - - /* - * Calculating queue size which is log base 2 of actual queue size -1 - * dwords and another -1 for ffs - */ - m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - - 1 - 1; - m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); - m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); - m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | - DOORBELL_OFFSET(q->doorbell_off); - - m->cp_hqd_vmid = q->vmid; - - if (q->format == KFD_QUEUE_FORMAT_AQL) { - m->cp_hqd_iq_rptr = AQL_ENABLE; - m->cp_hqd_pq_control |= NO_UPDATE_RPTR; - } - - m->cp_hqd_active = 0; - q->is_active = false; - if (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0) { - m->cp_hqd_active = 1; - q->is_active = true; - } - - return 0; -} - -static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct cik_sdma_rlc_registers *m; - - BUG_ON(!mm || !mqd || !q); - - m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = - SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) | - SDMA_RB_VMID(q->vmid) | - SDMA_RPTR_WRITEBACK_ENABLE | - SDMA_RPTR_WRITEBACK_TIMER(6); - - m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); - m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); - m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); - m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE; - m->sdma_rlc_virtual_addr = q->sdma_vm_addr; - - m->sdma_engine_id = q->sdma_engine_id; - m->sdma_queue_id = q->sdma_queue_id; - - q->is_active = false; - if (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0) { - m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE; - q->is_active = true; - } - - return 0; -} - -static int destroy_mqd(struct mqd_manager *mm, void *mqd, - enum kfd_preempt_type type, - unsigned int timeout, uint32_t pipe_id, - uint32_t queue_id) -{ - return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, - pipe_id, queue_id); -} - -/* - * preempt type here is ignored because there is only one way - * to preempt sdma queue - */ -static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, - enum kfd_preempt_type type, - unsigned int timeout, uint32_t pipe_id, - uint32_t queue_id) -{ - return kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); -} - -static bool is_occupied(struct mqd_manager *mm, void *mqd, - uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id) -{ - - return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, - pipe_id, queue_id); - -} - -static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, - uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id) -{ - return kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); -} - -/* - * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation. - * The HIQ queue in Kaveri is using the same MQD structure as all the user mode - * queues but with different initial values. - */ - -static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, - struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, - struct queue_properties *q) -{ - uint64_t addr; - struct cik_mqd *m; - int retval; - - BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); - - pr_debug("kfd: In func %s\n", __func__); - - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), - mqd_mem_obj); - - if (retval != 0) - return -ENOMEM; - - m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; - addr = (*mqd_mem_obj)->gpu_addr; - - memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); - - m->header = 0xC0310800; - m->compute_pipelinestat_enable = 1; - m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; - - m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE | - PRELOAD_REQ; - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | - QUANTUM_DURATION(10); - - m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; - m->cp_mqd_base_addr_lo = lower_32_bits(addr); - m->cp_mqd_base_addr_hi = upper_32_bits(addr); - - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; - - /* - * Pipe Priority - * Identifies the pipe relative priority when this queue is connected - * to the pipeline. The pipe priority is against the GFX pipe and HP3D. - * In KFD we are using a fixed pipe priority set to CS_MEDIUM. - * 0 = CS_LOW (typically below GFX) - * 1 = CS_MEDIUM (typically between HP3D and GFX - * 2 = CS_HIGH (typically above HP3D) - */ - m->cp_hqd_pipe_priority = 1; - m->cp_hqd_queue_priority = 15; - - *mqd = m; - if (gart_addr) - *gart_addr = addr; - retval = mm->update_mqd(mm, m, q); - - return retval; -} - -static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct cik_mqd *m; - - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - - m = get_mqd(mqd); - m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | - DEFAULT_MIN_AVAIL_SIZE | - PRIV_STATE | - KMD_QUEUE; - - /* - * Calculating queue size which is log base 2 of actual queue - * size -1 dwords - */ - m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - - 1 - 1; - m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); - m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); - m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | - DOORBELL_OFFSET(q->doorbell_off); - - m->cp_hqd_vmid = q->vmid; - - m->cp_hqd_active = 0; - q->is_active = false; - if (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0) { - m->cp_hqd_active = 1; - q->is_active = true; - } - - return 0; -} - -/* - * SDMA MQD Implementation - */ - -struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) -{ - struct cik_sdma_rlc_registers *m; - - BUG_ON(!mqd); - - m = (struct cik_sdma_rlc_registers *)mqd; - - return m; -} struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev) { - struct mqd_manager *mqd; - - BUG_ON(!dev); - BUG_ON(type >= KFD_MQD_TYPE_MAX); - - pr_debug("kfd: In func %s\n", __func__); - - mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); - if (!mqd) - return NULL; - - mqd->dev = dev; - - switch (type) { - case KFD_MQD_TYPE_CIK_CP: - case KFD_MQD_TYPE_CIK_COMPUTE: - mqd->init_mqd = init_mqd; - mqd->uninit_mqd = uninit_mqd; - mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd; - mqd->destroy_mqd = destroy_mqd; - mqd->is_occupied = is_occupied; - break; - case KFD_MQD_TYPE_CIK_HIQ: - mqd->init_mqd = init_mqd_hiq; - mqd->uninit_mqd = uninit_mqd; - mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; - mqd->destroy_mqd = destroy_mqd; - mqd->is_occupied = is_occupied; - break; - case KFD_MQD_TYPE_CIK_SDMA: - mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; - mqd->load_mqd = load_mqd_sdma; - mqd->update_mqd = update_mqd_sdma; - mqd->destroy_mqd = destroy_mqd_sdma; - mqd->is_occupied = is_occupied_sdma; - break; - default: - kfree(mqd); - return NULL; + switch (dev->device_info->asic_family) { + case CHIP_KAVERI: + return mqd_manager_init_cik(type, dev); + case CHIP_CARRIZO: + return mqd_manager_init_vi(type, dev); } - return mqd; + return NULL; } - -/* SDMA queues should be implemented here when the cp will supports them */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c new file mode 100644 index 000000000000..4e582debfaa9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -0,0 +1,448 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" +#include "cik_regs.h" +#include "cik_structs.h" + +static inline struct cik_mqd *get_mqd(void *mqd) +{ + return (struct cik_mqd *)mqd; +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + uint64_t addr; + struct cik_mqd *m; + int retval; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + /* + * Make sure to use the last queue state saved on mqd when the cp + * reassigns the queue, so when queue is switched on/off (e.g over + * subscription or quantum timeout) the context will be consistent + */ + m->cp_hqd_persistent_state = + DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ; + + m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; + /* Although WinKFD writes this, I suspect it should not be necessary */ + m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; + + m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | + QUANTUM_DURATION(10); + + /* + * Pipe Priority + * Identifies the pipe relative priority when this queue is connected + * to the pipeline. The pipe priority is against the GFX pipe and HP3D. + * In KFD we are using a fixed pipe priority set to CS_MEDIUM. + * 0 = CS_LOW (typically below GFX) + * 1 = CS_MEDIUM (typically between HP3D and GFX + * 2 = CS_HIGH (typically above HP3D) + */ + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mm || !mqd || !mqd_mem_obj); + + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct cik_sdma_rlc_registers), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct cik_sdma_rlc_registers)); + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + BUG_ON(!mm || !mqd); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + BUG_ON(!mm || !mqd); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr) +{ + return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr) +{ + return kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); +} + +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_mqd *m; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + m = get_mqd(mqd); + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; + + /* + * Calculating queue size which is log base 2 of actual queue size -1 + * dwords and another -1 for ffs + */ + m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) + - 1 - 1; + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_doorbell_control = DOORBELL_EN | + DOORBELL_OFFSET(q->doorbell_off); + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_iq_rptr = AQL_ENABLE; + m->cp_hqd_pq_control |= NO_UPDATE_RPTR; + } + + m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->cp_hqd_active = 1; + q->is_active = true; + } + + return 0; +} + +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mm || !mqd || !q); + + m = get_sdma_mqd(mqd); + m->sdma_rlc_rb_cntl = + SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) | + SDMA_RB_VMID(q->vmid) | + SDMA_RPTR_WRITEBACK_ENABLE | + SDMA_RPTR_WRITEBACK_TIMER(6); + + m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE; + m->sdma_rlc_virtual_addr = q->sdma_vm_addr; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE; + q->is_active = true; + } + + return 0; +} + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, + pipe_id, queue_id); +} + +/* + * preempt type here is ignored because there is only one way + * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + + return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, + pipe_id, queue_id); + +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + +/* + * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation. + * The HIQ queue in Kaveri is using the same MQD structure as all the user mode + * queues but with different initial values. + */ + +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + uint64_t addr; + struct cik_mqd *m; + int retval; + + BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); + + pr_debug("kfd: In func %s\n", __func__); + + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE | + PRELOAD_REQ; + m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | + QUANTUM_DURATION(10); + + m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; + + /* + * Pipe Priority + * Identifies the pipe relative priority when this queue is connected + * to the pipeline. The pipe priority is against the GFX pipe and HP3D. + * In KFD we are using a fixed pipe priority set to CS_MEDIUM. + * 0 = CS_LOW (typically below GFX) + * 1 = CS_MEDIUM (typically between HP3D and GFX + * 2 = CS_HIGH (typically above HP3D) + */ + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_mqd *m; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + m = get_mqd(mqd); + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE | + PRIV_STATE | + KMD_QUEUE; + + /* + * Calculating queue size which is log base 2 of actual queue + * size -1 dwords + */ + m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) + - 1 - 1; + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_doorbell_control = DOORBELL_EN | + DOORBELL_OFFSET(q->doorbell_off); + + m->cp_hqd_vmid = q->vmid; + + m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->cp_hqd_active = 1; + q->is_active = true; + } + + return 0; +} + +struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mqd); + + m = (struct cik_sdma_rlc_registers *)mqd; + + return m; +} + +struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + BUG_ON(!dev); + BUG_ON(type >= KFD_MQD_TYPE_MAX); + + pr_debug("kfd: In func %s\n", __func__); + + mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CP: + case KFD_MQD_TYPE_COMPUTE: + mqd->init_mqd = init_mqd; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + break; + case KFD_MQD_TYPE_HIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + break; + case KFD_MQD_TYPE_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; +} + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c new file mode 100644 index 000000000000..b3a7e3ba1e38 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -0,0 +1,33 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" + +struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + pr_warn("amdkfd: VI MQD is not currently supported\n"); + return NULL; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 3cda952ac2f8..e2533d875f43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -348,7 +348,7 @@ int pm_send_set_resources(struct packet_manager *pm, pr_debug("kfd: In func %s\n", __func__); mutex_lock(&pm->lock); - pm->priv_queue->acquire_packet_buffer(pm->priv_queue, + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, sizeof(*packet) / sizeof(uint32_t), (unsigned int **)&packet); if (packet == NULL) { @@ -375,8 +375,7 @@ int pm_send_set_resources(struct packet_manager *pm, packet->queue_mask_lo = lower_32_bits(res->queue_mask); packet->queue_mask_hi = upper_32_bits(res->queue_mask); - pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); @@ -402,7 +401,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t); mutex_lock(&pm->lock); - retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue, + retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, packet_size_dwords, &rl_buffer); if (retval != 0) goto fail_acquire_packet_buffer; @@ -412,15 +411,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval != 0) goto fail_create_runlist; - pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); return retval; fail_create_runlist: - pm->priv_queue->rollback_packet(pm->priv_queue); + pm->priv_queue->ops.rollback_packet(pm->priv_queue); fail_acquire_packet_buffer: mutex_unlock(&pm->lock); fail_create_runlist_ib: @@ -438,7 +436,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, BUG_ON(!pm || !fence_address); mutex_lock(&pm->lock); - retval = pm->priv_queue->acquire_packet_buffer( + retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, sizeof(struct pm4_query_status) / sizeof(uint32_t), (unsigned int **)&packet); @@ -459,8 +457,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, packet->data_hi = upper_32_bits((uint64_t)fence_value); packet->data_lo = lower_32_bits((uint64_t)fence_value); - pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); return 0; @@ -482,7 +479,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, BUG_ON(!pm); mutex_lock(&pm->lock); - retval = pm->priv_queue->acquire_packet_buffer( + retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, sizeof(struct pm4_unmap_queues) / sizeof(uint32_t), &buffer); @@ -537,8 +534,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, break; }; - pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a79c21781d3b..bfcf45f30b76 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -104,7 +104,13 @@ enum cache_policy { cache_policy_noncoherent }; +enum asic_family_type { + CHIP_KAVERI = 0, + CHIP_CARRIZO +}; + struct kfd_device_info { + unsigned int asic_family; unsigned int max_pasid_bits; size_t ih_ring_entry_size; uint8_t num_of_watch_points; @@ -299,6 +305,11 @@ struct queue_properties { uint32_t sdma_engine_id; uint32_t sdma_queue_id; uint32_t sdma_vm_addr; + /* Relevant only for VI */ + uint64_t eop_ring_buffer_address; + uint32_t eop_ring_buffer_size; + uint64_t ctx_save_restore_area_address; + uint32_t ctx_save_restore_area_size; }; /** @@ -351,10 +362,10 @@ struct queue { * Please read the kfd_mqd_manager.h description. */ enum KFD_MQD_TYPE { - KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */ - KFD_MQD_TYPE_CIK_HIQ, /* for hiq */ - KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */ - KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */ + KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */ + KFD_MQD_TYPE_HIQ, /* for hiq */ + KFD_MQD_TYPE_CP, /* for cp queues and diq */ + KFD_MQD_TYPE_SDMA, /* for sdma queues */ KFD_MQD_TYPE_MAX }; @@ -562,6 +573,10 @@ void print_queue(struct queue *q); struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 948b1ca8e7a2..513eeb6e402a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -178,7 +178,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, if (list_empty(&pqm->queues)) { pdd->qpd.pqm = pqm; - dev->dqm->register_process(dev->dqm, &pdd->qpd); + dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); } pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL); @@ -204,7 +204,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd, + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, &q->properties.vmid); print_queue(q); break; @@ -217,7 +217,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, kq->queue->properties.queue_id = *qid; pqn->kq = kq; pqn->q = NULL; - retval = dev->dqm->create_kernel_queue(dev->dqm, kq, &pdd->qpd); + retval = dev->dqm->ops.create_kernel_queue(dev->dqm, + kq, &pdd->qpd); break; default: BUG(); @@ -285,13 +286,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (pqn->kq) { /* destroy kernel queue (DIQ) */ dqm = pqn->kq->dev->dqm; - dqm->destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); + dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); kernel_queue_uninit(pqn->kq); } if (pqn->q) { dqm = pqn->q->device->dqm; - retval = dqm->destroy_queue(dqm, &pdd->qpd, pqn->q); + retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval != 0) return retval; @@ -303,7 +304,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) clear_bit(qid, pqm->queue_slot_bitmap); if (list_empty(&pqm->queues)) - dqm->unregister_process(dqm, &pdd->qpd); + dqm->ops.unregister_process(dqm, &pdd->qpd); return retval; } @@ -324,7 +325,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, pqn->q->properties.queue_percent = p->queue_percent; pqn->q->properties.priority = p->priority; - retval = pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q); + retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q); if (retval != 0) return retval; diff --git a/drivers/gpu/drm/amd/include/cik_structs.h b/drivers/gpu/drm/amd/include/cik_structs.h new file mode 100644 index 000000000000..749eab94e335 --- /dev/null +++ b/drivers/gpu/drm/amd/include/cik_structs.h @@ -0,0 +1,293 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef CIK_STRUCTS_H_ +#define CIK_STRUCTS_H_ + +struct cik_mqd { + uint32_t header; + uint32_t compute_dispatch_initiator; + uint32_t compute_dim_x; + uint32_t compute_dim_y; + uint32_t compute_dim_z; + uint32_t compute_start_x; + uint32_t compute_start_y; + uint32_t compute_start_z; + uint32_t compute_num_thread_x; + uint32_t compute_num_thread_y; + uint32_t compute_num_thread_z; + uint32_t compute_pipelinestat_enable; + uint32_t compute_perfcount_enable; + uint32_t compute_pgm_lo; + uint32_t compute_pgm_hi; + uint32_t compute_tba_lo; + uint32_t compute_tba_hi; + uint32_t compute_tma_lo; + uint32_t compute_tma_hi; + uint32_t compute_pgm_rsrc1; + uint32_t compute_pgm_rsrc2; + uint32_t compute_vmid; + uint32_t compute_resource_limits; + uint32_t compute_static_thread_mgmt_se0; + uint32_t compute_static_thread_mgmt_se1; + uint32_t compute_tmpring_size; + uint32_t compute_static_thread_mgmt_se2; + uint32_t compute_static_thread_mgmt_se3; + uint32_t compute_restart_x; + uint32_t compute_restart_y; + uint32_t compute_restart_z; + uint32_t compute_thread_trace_enable; + uint32_t compute_misc_reserved; + uint32_t compute_user_data_0; + uint32_t compute_user_data_1; + uint32_t compute_user_data_2; + uint32_t compute_user_data_3; + uint32_t compute_user_data_4; + uint32_t compute_user_data_5; + uint32_t compute_user_data_6; + uint32_t compute_user_data_7; + uint32_t compute_user_data_8; + uint32_t compute_user_data_9; + uint32_t compute_user_data_10; + uint32_t compute_user_data_11; + uint32_t compute_user_data_12; + uint32_t compute_user_data_13; + uint32_t compute_user_data_14; + uint32_t compute_user_data_15; + uint32_t cp_compute_csinvoc_count_lo; + uint32_t cp_compute_csinvoc_count_hi; + uint32_t cp_mqd_base_addr_lo; + uint32_t cp_mqd_base_addr_hi; + uint32_t cp_hqd_active; + uint32_t cp_hqd_vmid; + uint32_t cp_hqd_persistent_state; + uint32_t cp_hqd_pipe_priority; + uint32_t cp_hqd_queue_priority; + uint32_t cp_hqd_quantum; + uint32_t cp_hqd_pq_base_lo; + uint32_t cp_hqd_pq_base_hi; + uint32_t cp_hqd_pq_rptr; + uint32_t cp_hqd_pq_rptr_report_addr_lo; + uint32_t cp_hqd_pq_rptr_report_addr_hi; + uint32_t cp_hqd_pq_wptr_poll_addr_lo; + uint32_t cp_hqd_pq_wptr_poll_addr_hi; + uint32_t cp_hqd_pq_doorbell_control; + uint32_t cp_hqd_pq_wptr; + uint32_t cp_hqd_pq_control; + uint32_t cp_hqd_ib_base_addr_lo; + uint32_t cp_hqd_ib_base_addr_hi; + uint32_t cp_hqd_ib_rptr; + uint32_t cp_hqd_ib_control; + uint32_t cp_hqd_iq_timer; + uint32_t cp_hqd_iq_rptr; + uint32_t cp_hqd_dequeue_request; + uint32_t cp_hqd_dma_offload; + uint32_t cp_hqd_sema_cmd; + uint32_t cp_hqd_msg_type; + uint32_t cp_hqd_atomic0_preop_lo; + uint32_t cp_hqd_atomic0_preop_hi; + uint32_t cp_hqd_atomic1_preop_lo; + uint32_t cp_hqd_atomic1_preop_hi; + uint32_t cp_hqd_hq_status0; + uint32_t cp_hqd_hq_control0; + uint32_t cp_mqd_control; + uint32_t cp_mqd_query_time_lo; + uint32_t cp_mqd_query_time_hi; + uint32_t cp_mqd_connect_start_time_lo; + uint32_t cp_mqd_connect_start_time_hi; + uint32_t cp_mqd_connect_end_time_lo; + uint32_t cp_mqd_connect_end_time_hi; + uint32_t cp_mqd_connect_end_wf_count; + uint32_t cp_mqd_connect_end_pq_rptr; + uint32_t cp_mqd_connect_end_pq_wptr; + uint32_t cp_mqd_connect_end_ib_rptr; + uint32_t reserved_96; + uint32_t reserved_97; + uint32_t reserved_98; + uint32_t reserved_99; + uint32_t iqtimer_pkt_header; + uint32_t iqtimer_pkt_dw0; + uint32_t iqtimer_pkt_dw1; + uint32_t iqtimer_pkt_dw2; + uint32_t iqtimer_pkt_dw3; + uint32_t iqtimer_pkt_dw4; + uint32_t iqtimer_pkt_dw5; + uint32_t iqtimer_pkt_dw6; + uint32_t reserved_108; + uint32_t reserved_109; + uint32_t reserved_110; + uint32_t reserved_111; + uint32_t queue_doorbell_id0; + uint32_t queue_doorbell_id1; + uint32_t queue_doorbell_id2; + uint32_t queue_doorbell_id3; + uint32_t queue_doorbell_id4; + uint32_t queue_doorbell_id5; + uint32_t queue_doorbell_id6; + uint32_t queue_doorbell_id7; + uint32_t queue_doorbell_id8; + uint32_t queue_doorbell_id9; + uint32_t queue_doorbell_id10; + uint32_t queue_doorbell_id11; + uint32_t queue_doorbell_id12; + uint32_t queue_doorbell_id13; + uint32_t queue_doorbell_id14; + uint32_t queue_doorbell_id15; +}; + +struct cik_sdma_rlc_registers { + uint32_t sdma_rlc_rb_cntl; + uint32_t sdma_rlc_rb_base; + uint32_t sdma_rlc_rb_base_hi; + uint32_t sdma_rlc_rb_rptr; + uint32_t sdma_rlc_rb_wptr; + uint32_t sdma_rlc_rb_wptr_poll_cntl; + uint32_t sdma_rlc_rb_wptr_poll_addr_hi; + uint32_t sdma_rlc_rb_wptr_poll_addr_lo; + uint32_t sdma_rlc_rb_rptr_addr_hi; + uint32_t sdma_rlc_rb_rptr_addr_lo; + uint32_t sdma_rlc_ib_cntl; + uint32_t sdma_rlc_ib_rptr; + uint32_t sdma_rlc_ib_offset; + uint32_t sdma_rlc_ib_base_lo; + uint32_t sdma_rlc_ib_base_hi; + uint32_t sdma_rlc_ib_size; + uint32_t sdma_rlc_skip_cntl; + uint32_t sdma_rlc_context_status; + uint32_t sdma_rlc_doorbell; + uint32_t sdma_rlc_virtual_addr; + uint32_t sdma_rlc_ape1_cntl; + uint32_t sdma_rlc_doorbell_log; + uint32_t reserved_22; + uint32_t reserved_23; + uint32_t reserved_24; + uint32_t reserved_25; + uint32_t reserved_26; + uint32_t reserved_27; + uint32_t reserved_28; + uint32_t reserved_29; + uint32_t reserved_30; + uint32_t reserved_31; + uint32_t reserved_32; + uint32_t reserved_33; + uint32_t reserved_34; + uint32_t reserved_35; + uint32_t reserved_36; + uint32_t reserved_37; + uint32_t reserved_38; + uint32_t reserved_39; + uint32_t reserved_40; + uint32_t reserved_41; + uint32_t reserved_42; + uint32_t reserved_43; + uint32_t reserved_44; + uint32_t reserved_45; + uint32_t reserved_46; + uint32_t reserved_47; + uint32_t reserved_48; + uint32_t reserved_49; + uint32_t reserved_50; + uint32_t reserved_51; + uint32_t reserved_52; + uint32_t reserved_53; + uint32_t reserved_54; + uint32_t reserved_55; + uint32_t reserved_56; + uint32_t reserved_57; + uint32_t reserved_58; + uint32_t reserved_59; + uint32_t reserved_60; + uint32_t reserved_61; + uint32_t reserved_62; + uint32_t reserved_63; + uint32_t reserved_64; + uint32_t reserved_65; + uint32_t reserved_66; + uint32_t reserved_67; + uint32_t reserved_68; + uint32_t reserved_69; + uint32_t reserved_70; + uint32_t reserved_71; + uint32_t reserved_72; + uint32_t reserved_73; + uint32_t reserved_74; + uint32_t reserved_75; + uint32_t reserved_76; + uint32_t reserved_77; + uint32_t reserved_78; + uint32_t reserved_79; + uint32_t reserved_80; + uint32_t reserved_81; + uint32_t reserved_82; + uint32_t reserved_83; + uint32_t reserved_84; + uint32_t reserved_85; + uint32_t reserved_86; + uint32_t reserved_87; + uint32_t reserved_88; + uint32_t reserved_89; + uint32_t reserved_90; + uint32_t reserved_91; + uint32_t reserved_92; + uint32_t reserved_93; + uint32_t reserved_94; + uint32_t reserved_95; + uint32_t reserved_96; + uint32_t reserved_97; + uint32_t reserved_98; + uint32_t reserved_99; + uint32_t reserved_100; + uint32_t reserved_101; + uint32_t reserved_102; + uint32_t reserved_103; + uint32_t reserved_104; + uint32_t reserved_105; + uint32_t reserved_106; + uint32_t reserved_107; + uint32_t reserved_108; + uint32_t reserved_109; + uint32_t reserved_110; + uint32_t reserved_111; + uint32_t reserved_112; + uint32_t reserved_113; + uint32_t reserved_114; + uint32_t reserved_115; + uint32_t reserved_116; + uint32_t reserved_117; + uint32_t reserved_118; + uint32_t reserved_119; + uint32_t reserved_120; + uint32_t reserved_121; + uint32_t reserved_122; + uint32_t reserved_123; + uint32_t reserved_124; + uint32_t reserved_125; + uint32_t reserved_126; + uint32_t reserved_127; + uint32_t sdma_engine_id; + uint32_t sdma_queue_id; +}; + + + +#endif /* CIK_STRUCTS_H_ */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index cd3878fe6f77..883499740e33 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -129,9 +129,6 @@ struct kgd2kfd_calls { * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp * scheduling mode. Only used for no cp scheduling mode. * - * @init_memory: Initializes memory apertures to fixed base/limit address - * and non cached memory types. - * * @init_pipeline: Initialized the compute pipelines. * * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp @@ -175,7 +172,6 @@ struct kfd2kgd_calls { int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); - int (*init_memory)(struct kgd_dev *kgd); int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 12bc21219a0e..c58cfd3c3917 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -2,7 +2,7 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -ccflags-y := -Iinclude/drm +ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include hostprogs-y := mkregtable clean-files := rn50_reg_safe.h r100_reg_safe.h r200_reg_safe.h rv515_reg_safe.h r300_reg_safe.h r420_reg_safe.h rs600_reg_safe.h r600_reg_safe.h evergreen_reg_safe.h cayman_reg_safe.h diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6dcde3798b45..14d173e2a8f7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5707,6 +5707,28 @@ void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) WREG32(VM_INVALIDATE_REQUEST, 0x1); } +static void cik_pcie_init_compute_vmid(struct radeon_device *rdev) +{ + int i; + uint32_t sh_mem_bases, sh_mem_config; + + sh_mem_bases = 0x6000 | 0x6000 << 16; + sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED); + sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED); + + mutex_lock(&rdev->srbm_mutex); + for (i = 8; i < 16; i++) { + cik_srbm_select(rdev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(SH_MEM_CONFIG, sh_mem_config); + WREG32(SH_MEM_APE1_BASE, 1); + WREG32(SH_MEM_APE1_LIMIT, 0); + WREG32(SH_MEM_BASES, sh_mem_bases); + } + cik_srbm_select(rdev, 0, 0, 0, 0); + mutex_unlock(&rdev->srbm_mutex); +} + /** * cik_pcie_gart_enable - gart enable * @@ -5820,6 +5842,8 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) cik_srbm_select(rdev, 0, 0, 0, 0); mutex_unlock(&rdev->srbm_mutex); + cik_pcie_init_compute_vmid(rdev); + cik_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(rdev->mc.gtt_size >> 20), diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h index bbb8f2e43637..f667347d8157 100644 --- a/drivers/gpu/drm/radeon/cik_reg.h +++ b/drivers/gpu/drm/radeon/cik_reg.h @@ -184,268 +184,4 @@ #define SDMA0_CNTL 0xD010 #define SDMA1_CNTL 0xD810 -struct cik_mqd { - uint32_t header; - uint32_t compute_dispatch_initiator; - uint32_t compute_dim_x; - uint32_t compute_dim_y; - uint32_t compute_dim_z; - uint32_t compute_start_x; - uint32_t compute_start_y; - uint32_t compute_start_z; - uint32_t compute_num_thread_x; - uint32_t compute_num_thread_y; - uint32_t compute_num_thread_z; - uint32_t compute_pipelinestat_enable; - uint32_t compute_perfcount_enable; - uint32_t compute_pgm_lo; - uint32_t compute_pgm_hi; - uint32_t compute_tba_lo; - uint32_t compute_tba_hi; - uint32_t compute_tma_lo; - uint32_t compute_tma_hi; - uint32_t compute_pgm_rsrc1; - uint32_t compute_pgm_rsrc2; - uint32_t compute_vmid; - uint32_t compute_resource_limits; - uint32_t compute_static_thread_mgmt_se0; - uint32_t compute_static_thread_mgmt_se1; - uint32_t compute_tmpring_size; - uint32_t compute_static_thread_mgmt_se2; - uint32_t compute_static_thread_mgmt_se3; - uint32_t compute_restart_x; - uint32_t compute_restart_y; - uint32_t compute_restart_z; - uint32_t compute_thread_trace_enable; - uint32_t compute_misc_reserved; - uint32_t compute_user_data_0; - uint32_t compute_user_data_1; - uint32_t compute_user_data_2; - uint32_t compute_user_data_3; - uint32_t compute_user_data_4; - uint32_t compute_user_data_5; - uint32_t compute_user_data_6; - uint32_t compute_user_data_7; - uint32_t compute_user_data_8; - uint32_t compute_user_data_9; - uint32_t compute_user_data_10; - uint32_t compute_user_data_11; - uint32_t compute_user_data_12; - uint32_t compute_user_data_13; - uint32_t compute_user_data_14; - uint32_t compute_user_data_15; - uint32_t cp_compute_csinvoc_count_lo; - uint32_t cp_compute_csinvoc_count_hi; - uint32_t cp_mqd_base_addr_lo; - uint32_t cp_mqd_base_addr_hi; - uint32_t cp_hqd_active; - uint32_t cp_hqd_vmid; - uint32_t cp_hqd_persistent_state; - uint32_t cp_hqd_pipe_priority; - uint32_t cp_hqd_queue_priority; - uint32_t cp_hqd_quantum; - uint32_t cp_hqd_pq_base_lo; - uint32_t cp_hqd_pq_base_hi; - uint32_t cp_hqd_pq_rptr; - uint32_t cp_hqd_pq_rptr_report_addr_lo; - uint32_t cp_hqd_pq_rptr_report_addr_hi; - uint32_t cp_hqd_pq_wptr_poll_addr_lo; - uint32_t cp_hqd_pq_wptr_poll_addr_hi; - uint32_t cp_hqd_pq_doorbell_control; - uint32_t cp_hqd_pq_wptr; - uint32_t cp_hqd_pq_control; - uint32_t cp_hqd_ib_base_addr_lo; - uint32_t cp_hqd_ib_base_addr_hi; - uint32_t cp_hqd_ib_rptr; - uint32_t cp_hqd_ib_control; - uint32_t cp_hqd_iq_timer; - uint32_t cp_hqd_iq_rptr; - uint32_t cp_hqd_dequeue_request; - uint32_t cp_hqd_dma_offload; - uint32_t cp_hqd_sema_cmd; - uint32_t cp_hqd_msg_type; - uint32_t cp_hqd_atomic0_preop_lo; - uint32_t cp_hqd_atomic0_preop_hi; - uint32_t cp_hqd_atomic1_preop_lo; - uint32_t cp_hqd_atomic1_preop_hi; - uint32_t cp_hqd_hq_status0; - uint32_t cp_hqd_hq_control0; - uint32_t cp_mqd_control; - uint32_t cp_mqd_query_time_lo; - uint32_t cp_mqd_query_time_hi; - uint32_t cp_mqd_connect_start_time_lo; - uint32_t cp_mqd_connect_start_time_hi; - uint32_t cp_mqd_connect_end_time_lo; - uint32_t cp_mqd_connect_end_time_hi; - uint32_t cp_mqd_connect_end_wf_count; - uint32_t cp_mqd_connect_end_pq_rptr; - uint32_t cp_mqd_connect_end_pq_wptr; - uint32_t cp_mqd_connect_end_ib_rptr; - uint32_t reserved_96; - uint32_t reserved_97; - uint32_t reserved_98; - uint32_t reserved_99; - uint32_t iqtimer_pkt_header; - uint32_t iqtimer_pkt_dw0; - uint32_t iqtimer_pkt_dw1; - uint32_t iqtimer_pkt_dw2; - uint32_t iqtimer_pkt_dw3; - uint32_t iqtimer_pkt_dw4; - uint32_t iqtimer_pkt_dw5; - uint32_t iqtimer_pkt_dw6; - uint32_t reserved_108; - uint32_t reserved_109; - uint32_t reserved_110; - uint32_t reserved_111; - uint32_t queue_doorbell_id0; - uint32_t queue_doorbell_id1; - uint32_t queue_doorbell_id2; - uint32_t queue_doorbell_id3; - uint32_t queue_doorbell_id4; - uint32_t queue_doorbell_id5; - uint32_t queue_doorbell_id6; - uint32_t queue_doorbell_id7; - uint32_t queue_doorbell_id8; - uint32_t queue_doorbell_id9; - uint32_t queue_doorbell_id10; - uint32_t queue_doorbell_id11; - uint32_t queue_doorbell_id12; - uint32_t queue_doorbell_id13; - uint32_t queue_doorbell_id14; - uint32_t queue_doorbell_id15; -}; - -struct cik_sdma_rlc_registers { - uint32_t sdma_rlc_rb_cntl; - uint32_t sdma_rlc_rb_base; - uint32_t sdma_rlc_rb_base_hi; - uint32_t sdma_rlc_rb_rptr; - uint32_t sdma_rlc_rb_wptr; - uint32_t sdma_rlc_rb_wptr_poll_cntl; - uint32_t sdma_rlc_rb_wptr_poll_addr_hi; - uint32_t sdma_rlc_rb_wptr_poll_addr_lo; - uint32_t sdma_rlc_rb_rptr_addr_hi; - uint32_t sdma_rlc_rb_rptr_addr_lo; - uint32_t sdma_rlc_ib_cntl; - uint32_t sdma_rlc_ib_rptr; - uint32_t sdma_rlc_ib_offset; - uint32_t sdma_rlc_ib_base_lo; - uint32_t sdma_rlc_ib_base_hi; - uint32_t sdma_rlc_ib_size; - uint32_t sdma_rlc_skip_cntl; - uint32_t sdma_rlc_context_status; - uint32_t sdma_rlc_doorbell; - uint32_t sdma_rlc_virtual_addr; - uint32_t sdma_rlc_ape1_cntl; - uint32_t sdma_rlc_doorbell_log; - uint32_t reserved_22; - uint32_t reserved_23; - uint32_t reserved_24; - uint32_t reserved_25; - uint32_t reserved_26; - uint32_t reserved_27; - uint32_t reserved_28; - uint32_t reserved_29; - uint32_t reserved_30; - uint32_t reserved_31; - uint32_t reserved_32; - uint32_t reserved_33; - uint32_t reserved_34; - uint32_t reserved_35; - uint32_t reserved_36; - uint32_t reserved_37; - uint32_t reserved_38; - uint32_t reserved_39; - uint32_t reserved_40; - uint32_t reserved_41; - uint32_t reserved_42; - uint32_t reserved_43; - uint32_t reserved_44; - uint32_t reserved_45; - uint32_t reserved_46; - uint32_t reserved_47; - uint32_t reserved_48; - uint32_t reserved_49; - uint32_t reserved_50; - uint32_t reserved_51; - uint32_t reserved_52; - uint32_t reserved_53; - uint32_t reserved_54; - uint32_t reserved_55; - uint32_t reserved_56; - uint32_t reserved_57; - uint32_t reserved_58; - uint32_t reserved_59; - uint32_t reserved_60; - uint32_t reserved_61; - uint32_t reserved_62; - uint32_t reserved_63; - uint32_t reserved_64; - uint32_t reserved_65; - uint32_t reserved_66; - uint32_t reserved_67; - uint32_t reserved_68; - uint32_t reserved_69; - uint32_t reserved_70; - uint32_t reserved_71; - uint32_t reserved_72; - uint32_t reserved_73; - uint32_t reserved_74; - uint32_t reserved_75; - uint32_t reserved_76; - uint32_t reserved_77; - uint32_t reserved_78; - uint32_t reserved_79; - uint32_t reserved_80; - uint32_t reserved_81; - uint32_t reserved_82; - uint32_t reserved_83; - uint32_t reserved_84; - uint32_t reserved_85; - uint32_t reserved_86; - uint32_t reserved_87; - uint32_t reserved_88; - uint32_t reserved_89; - uint32_t reserved_90; - uint32_t reserved_91; - uint32_t reserved_92; - uint32_t reserved_93; - uint32_t reserved_94; - uint32_t reserved_95; - uint32_t reserved_96; - uint32_t reserved_97; - uint32_t reserved_98; - uint32_t reserved_99; - uint32_t reserved_100; - uint32_t reserved_101; - uint32_t reserved_102; - uint32_t reserved_103; - uint32_t reserved_104; - uint32_t reserved_105; - uint32_t reserved_106; - uint32_t reserved_107; - uint32_t reserved_108; - uint32_t reserved_109; - uint32_t reserved_110; - uint32_t reserved_111; - uint32_t reserved_112; - uint32_t reserved_113; - uint32_t reserved_114; - uint32_t reserved_115; - uint32_t reserved_116; - uint32_t reserved_117; - uint32_t reserved_118; - uint32_t reserved_119; - uint32_t reserved_120; - uint32_t reserved_121; - uint32_t reserved_122; - uint32_t reserved_123; - uint32_t reserved_124; - uint32_t reserved_125; - uint32_t reserved_126; - uint32_t reserved_127; - uint32_t sdma_engine_id; - uint32_t sdma_queue_id; -}; - #endif diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index cae11eefecf0..ddbabab5d875 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -30,6 +30,7 @@ #include "radeon_kfd.h" #include "radeon_ucode.h" #include +#include "cik_structs.h" #define CIK_PIPE_PER_MEC (4) @@ -63,8 +64,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_memory(struct kgd_dev *kgd); - static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); @@ -89,7 +88,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_memory = kgd_init_memory, .init_pipeline = kgd_init_pipeline, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -375,42 +373,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_memory(struct kgd_dev *kgd) -{ - /* - * Configure apertures: - * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) - * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) - * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) - */ - int i; - uint32_t sh_mem_bases = PRIVATE_BASE(0x6000) | SHARED_BASE(0x6000); - - for (i = 8; i < 16; i++) { - uint32_t sh_mem_config; - - lock_srbm(kgd, 0, 0, 0, i); - - sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED); - sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED); - - write_register(kgd, SH_MEM_CONFIG, sh_mem_config); - - write_register(kgd, SH_MEM_BASES, sh_mem_bases); - - /* Scratch aperture is not supported for now. */ - write_register(kgd, SH_STATIC_MEM_CONFIG, 0); - - /* APE1 disabled for now. */ - write_register(kgd, SH_MEM_APE1_BASE, 1); - write_register(kgd, SH_MEM_APE1_LIMIT, 0); - - unlock_srbm(kgd); - } - - return 0; -} - static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/radeon/radeon_kfd.h index f90e161ca507..1103f9082f6b 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.h +++ b/drivers/gpu/drm/radeon/radeon_kfd.h @@ -29,7 +29,7 @@ #define RADEON_KFD_H_INCLUDED #include -#include "../amd/include/kgd_kfd_interface.h" +#include "kgd_kfd_interface.h" struct radeon_device;