From 56fc40aba4d9bc22ccbb8ce88a1c41f666d04f05 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 25 Sep 2019 23:57:30 -0400 Subject: [PATCH] drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid get_atc_vmid_pasid_mapping_valid() is very similar to get_atc_vmid_pasid_mapping_pasid(), so they can be merged into a new function get_atc_vmid_pasid_mapping_info() to reduce register access times. More importantly, getting the PASID and the valid bit atomically with a single read fixes some potential race conditions where the mapping changes between the two reads. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 6 +-- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 49 +++++++------------ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 28 ++++------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 32 ++++-------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 45 +++++++---------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 6 +-- .../gpu/drm/amd/amdkfd/cik_event_interrupt.c | 8 +-- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 16 +++--- .../gpu/drm/amd/include/kgd_kfd_interface.h | 8 ++- 9 files changed, 76 insertions(+), 122 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 68acb8d5296e..4a49dbee26a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -278,10 +278,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index fe5b702c75ce..72bfd556981a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -98,10 +98,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset); -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); @@ -155,10 +153,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, + .get_atc_vmid_pasid_mapping_info = + get_atc_vmid_pasid_mapping_info, .get_tile_config = amdgpu_amdkfd_get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, .invalidate_tlbs = invalidate_tlbs, @@ -775,26 +771,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) - + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) @@ -826,6 +813,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid; + uint16_t queried_pasid; + bool ret; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; if (amdgpu_emu_mode == 0 && ring->sched.ready) @@ -834,13 +823,13 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) - == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); - break; - } + + ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 8c97be0ff396..f77ddf7dba2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -133,9 +133,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset); -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); @@ -186,8 +185,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, @@ -753,24 +751,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static void set_scratch_backing_va(struct kgd_dev *kgd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 240441e15d52..7478caf096ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -89,10 +89,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset); -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, @@ -141,10 +139,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, + .get_atc_vmid_pasid_mapping_info = + get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, @@ -667,24 +663,16 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int kgd_address_watch_disable(struct kgd_dev *kgd) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df90c2229f1a..50f885576bbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -612,26 +612,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; -uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) - + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, @@ -666,6 +657,8 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid, i; + uint16_t queried_pasid; + bool ret; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; uint32_t flush_type = 0; @@ -681,14 +674,14 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) - == pasid) { - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - i, flush_type); - break; - } + + ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + i, flush_type); + break; } } @@ -813,10 +806,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 225bf64001e4..d9e9ad22b2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -55,10 +55,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset); -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid); void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 177d1e5329a5..9f59ba93cfe0 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -33,7 +33,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; const struct kfd2kgd_calls *f2g = dev->kfd2kgd; - unsigned int vmid, pasid; + unsigned int vmid; + uint16_t pasid; + bool ret; /* This workaround is due to HW/FW limitation on Hawaii that * VMID and PASID are not written into ih_ring_entry @@ -48,13 +50,13 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, *tmp_ihre = *ihre; vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); - pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); + ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid); tmp_ihre->ring_id &= 0x000000ff; tmp_ihre->ring_id |= vmid << 8; tmp_ihre->ring_id |= pasid << 16; - return (pasid != 0) && + return ret && (pasid != 0) && vmid >= dev->vm_info.first_vmid_kfd && vmid <= dev->vm_info.last_vmid_kfd; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index fedf0709b4ae..d59f2cd056c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -761,6 +761,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) { int status = 0; unsigned int vmid; + uint16_t queried_pasid; union SQ_CMD_BITS reg_sq_cmd; union GRBM_GFX_INDEX_BITS reg_gfx_index; struct kfd_process_device *pdd; @@ -782,14 +783,13 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) */ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { - if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid - (dev->kgd, vmid)) { - if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid - (dev->kgd, vmid) == p->pasid) { - pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", - vmid, p->pasid); - break; - } + status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info + (dev->kgd, vmid, &queried_pasid); + + if (status && queried_pasid == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", + vmid, p->pasid); + break; } } diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index e4f7bba11a3a..2cd217e60125 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -291,12 +291,10 @@ struct kfd2kgd_calls { uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset); - bool (*get_atc_vmid_pasid_mapping_valid)( + bool (*get_atc_vmid_pasid_mapping_info)( struct kgd_dev *kgd, - uint8_t vmid); - uint16_t (*get_atc_vmid_pasid_mapping_pasid)( - struct kgd_dev *kgd, - uint8_t vmid); + uint8_t vmid, + uint16_t *p_pasid); /* No longer needed from GFXv9 onward. The scratch base address is * passed to the shader by the CP. It's the user mode driver's